Commit a67c6565 authored by qlobbe's avatar qlobbe

add new branch tagger

parent f0ce268d
Pipeline #1077 failed with stage
......@@ -148,7 +148,7 @@ defaultConfig =
, phyloQuality = Quality 100 1
, timeUnit = Year 3 1 5
, clique = MaxClique 0
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
}
......@@ -368,7 +368,7 @@ data Order = Asc | Desc deriving (Show,Generic,Eq)
data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
data PhyloLabel =
BranchLabel
......
......@@ -46,7 +46,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc'
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
......@@ -12,7 +12,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector)
......@@ -395,6 +395,52 @@ processMetrics export = ngramsMetrics
-- | Taggers | --
-----------------
nk :: Int -> [[Int]] -> Int
nk n groups = sum
$ map (\g -> if (elem n g)
then 1
else 0) groups
tf :: Int -> [[Int]] -> Double
tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
idf :: Int -> [[Int]] -> Double
idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
findTfIdf :: [[Int]] -> [(Int,Double)]
findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
findEmergences groups freq =
let ngrams = map _phylo_groupNgrams groups
dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
$ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
in reverse $ sortOn snd
$ map (\(n,_) -> if (member n freq)
then (n,freq ! n)
else (n,0)) emerging
mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentTfIdf nth freq foundations export =
over ( export_branches
. traverse )
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
tfidf = findTfIdf (map _phylo_groupNgrams groups)
emergences = findEmergences groups freq
selected = if (null emergences)
then map fst $ take nth tfidf
else [fst $ head' "mostEmergentTfIdf" emergences]
++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
in b & branch_label .~ (ngramsToLabel foundations selected)) export
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
......@@ -431,8 +477,8 @@ mostEmergentInclusive nth foundations export =
in g & phylo_groupLabel .~ lbl ) export
processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
processLabels labels foundations export =
processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
processLabels labels foundations freq export =
foldl (\export' label ->
case label of
GroupLabel tagger nth ->
......@@ -442,6 +488,7 @@ processLabels labels foundations export =
BranchLabel tagger nth ->
case tagger of
MostInclusive -> mostInclusive nth foundations export'
MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
......@@ -458,7 +505,7 @@ toDynamics n parents g m =
{- decrease -}
then 2
else if ((fst prd) == (fst $ m ! n))
{- recombination -}
{- emerging -}
then 0
else if isNew
{- emergence -}
......@@ -571,7 +618,7 @@ toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics export
where
export :: PhyloExport
......
......@@ -202,7 +202,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0 cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.1 cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment