Commit a67c6565 authored by qlobbe's avatar qlobbe

add new branch tagger

parent f0ce268d
Pipeline #1077 failed with stage
...@@ -148,7 +148,7 @@ defaultConfig = ...@@ -148,7 +148,7 @@ defaultConfig =
, phyloQuality = Quality 100 1 , phyloQuality = Quality 100 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = MaxClique 0 , clique = MaxClique 0
, exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy , exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2] , exportFilter = [ByBranchSize 2]
} }
...@@ -368,7 +368,7 @@ data Order = Asc | Desc deriving (Show,Generic,Eq) ...@@ -368,7 +368,7 @@ data Order = Asc | Desc deriving (Show,Generic,Eq)
data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq) data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq) data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
data PhyloLabel = data PhyloLabel =
BranchLabel BranchLabel
......
...@@ -46,7 +46,7 @@ cooc2graph' distance threshold myCooc = distanceMap ...@@ -46,7 +46,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where where
(ti, _) = createIndices myCooc (ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc' matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
...@@ -12,7 +12,7 @@ Portability : POSIX ...@@ -12,7 +12,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloExport where module Gargantext.Core.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList) import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex) import Data.List ((++), sort, nub, null, concat, sortOn, reverse, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -395,6 +395,52 @@ processMetrics export = ngramsMetrics ...@@ -395,6 +395,52 @@ processMetrics export = ngramsMetrics
-- | Taggers | -- -- | Taggers | --
----------------- -----------------
nk :: Int -> [[Int]] -> Int
nk n groups = sum
$ map (\g -> if (elem n g)
then 1
else 0) groups
tf :: Int -> [[Int]] -> Double
tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
idf :: Int -> [[Int]] -> Double
idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
findTfIdf :: [[Int]] -> [(Int,Double)]
findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
findEmergences groups freq =
let ngrams = map _phylo_groupNgrams groups
dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
$ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
in reverse $ sortOn snd
$ map (\(n,_) -> if (member n freq)
then (n,freq ! n)
else (n,0)) emerging
mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentTfIdf nth freq foundations export =
over ( export_branches
. traverse )
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
tfidf = findTfIdf (map _phylo_groupNgrams groups)
emergences = findEmergences groups freq
selected = if (null emergences)
then map fst $ take nth tfidf
else [fst $ head' "mostEmergentTfIdf" emergences]
++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
in b & branch_label .~ (ngramsToLabel foundations selected)) export
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int] getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx)) getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth $ take nth
...@@ -431,8 +477,8 @@ mostEmergentInclusive nth foundations export = ...@@ -431,8 +477,8 @@ mostEmergentInclusive nth foundations export =
in g & phylo_groupLabel .~ lbl ) export in g & phylo_groupLabel .~ lbl ) export
processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
processLabels labels foundations export = processLabels labels foundations freq export =
foldl (\export' label -> foldl (\export' label ->
case label of case label of
GroupLabel tagger nth -> GroupLabel tagger nth ->
...@@ -442,6 +488,7 @@ processLabels labels foundations export = ...@@ -442,6 +488,7 @@ processLabels labels foundations export =
BranchLabel tagger nth -> BranchLabel tagger nth ->
case tagger of case tagger of
MostInclusive -> mostInclusive nth foundations export' MostInclusive -> mostInclusive nth foundations export'
MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels _ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
...@@ -458,7 +505,7 @@ toDynamics n parents g m = ...@@ -458,7 +505,7 @@ toDynamics n parents g m =
{- decrease -} {- decrease -}
then 2 then 2
else if ((fst prd) == (fst $ m ! n)) else if ((fst prd) == (fst $ m ! n))
{- recombination -} {- emerging -}
then 0 then 0
else if isNew else if isNew
{- emergence -} {- emergence -}
...@@ -571,7 +618,7 @@ toPhyloExport :: Phylo -> DotGraph DotId ...@@ -571,7 +618,7 @@ toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo) $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) $ processSort (exportSort $ getConfig phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics export $ processMetrics export
where where
export :: PhyloExport export :: PhyloExport
......
...@@ -202,7 +202,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -202,7 +202,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0 cooc)) in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.1 cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq mcl' = mcl `using` parList rdeepseq
in fromList mcl' in fromList mcl'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment