Commit 2a7a57c8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PHYLO] merge

parents eb88099c 27c82dbe
......@@ -26,7 +26,7 @@ import System.Directory (doesFileExist)
import Data.Aeson
import Data.Text (Text, unwords, unlines)
import Data.List ((++))
import Data.List ((++),concat)
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude
......@@ -37,6 +37,8 @@ import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Context (TermList)
import Control.Monad (mapM)
import System.Environment
import Gargantext.Viz.Phylo
......@@ -48,6 +50,8 @@ import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node
import Data.Maybe
import Control.Concurrent.Async as CCA (mapConcurrently)
import qualified Data.Map as DM
import qualified Data.Vector as DV
import qualified Data.List as DL
......@@ -141,7 +145,8 @@ wosToCorpus limit path = DL.take limit
. filter (\d -> (isJust $_hyperdataDocument_publication_year d)
&& (isJust $_hyperdataDocument_title d)
&& (isJust $_hyperdataDocument_abstract d))
<$> parseFile WOS path
. concat
<$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20]
-- | To use the correct parser given a CorpusType
......@@ -211,7 +216,7 @@ main = do
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
let phylo = toPhylo query corpus termList fis'
......
......@@ -198,7 +198,7 @@ type Ngrams = Text
data Document = Document
{ date :: Date
, text :: [Ngrams]
} deriving (Show,Generic)
} deriving (Show,Generic,NFData)
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
......@@ -209,7 +209,7 @@ data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_period :: (Date,Date)
} deriving (Generic,Show,Eq)
} deriving (Generic,NFData,Show,Eq)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
......@@ -309,7 +309,7 @@ data SBParams = SBParams
-- | Metric constructors
data Metric = BranchAge deriving (Generic, Show, Eq, Read)
data Metric = BranchAge | BranchBirth deriving (Generic, Show, Eq, Read)
----------------
......@@ -318,7 +318,8 @@ data Metric = BranchAge deriving (Generic, Show, Eq, Read)
-- | Tagger constructors
data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
| GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
--------------
......@@ -327,7 +328,7 @@ data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Gen
-- | Sort constructors
data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
data Order = Asc | Desc deriving (Generic, Show, Read)
......@@ -408,6 +409,7 @@ data PhyloNode = PhyloNode
, _pn_idx :: [Int]
, _pn_ngrams :: Maybe [Ngrams]
, _pn_metrics :: Map Text [Double]
, _pn_cooc :: Map (Int,Int) Double
, _pn_parents :: Maybe [PhyloGroupId]
, _pn_childs :: [PhyloNode]
} deriving (Generic, Show)
......
......@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates
where
import Control.Lens hiding (makeLenses, both, Level)
import Control.Parallel.Strategies
import Gargantext.Prelude hiding (elem)
import Gargantext.Text.Context (TermList)
......@@ -29,12 +28,15 @@ import Gargantext.Viz.Phylo.Tools
import Debug.Trace (trace)
import Data.List (partition, concat, nub, elem, sort, (++), null)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, toList, elems, keys, unionWith, mapWithKey)
import Data.List (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set (size)
import Data.Text (Text, unwords)
import Data.Vector (Vector)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
......@@ -53,9 +55,13 @@ termListToNgrams = map (\(lbl,_) -> unwords lbl)
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ fromList $ zip pds $ map (inPeriode f es) pds
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
......@@ -161,28 +167,45 @@ filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) []
in filter (\fis -> elem (getClique fis) cliqueMax) l)
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
$ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
$ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)
-- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
......
......@@ -28,6 +28,9 @@ TODO:
module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Control.Lens hiding (both, Level)
import Data.Text (Text, toLower)
import Data.List ((++))
import Data.Map (Map,empty)
......@@ -43,16 +46,23 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Main (writePhylo)
import GHC.IO (FilePath)
import qualified Data.List as List
------------------------------------------------------
-- | STEP 12 | -- Create a PhyloView from a user Query
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
phyloExport :: FilePath -> IO FilePath
phyloExport fp = writePhylo fp phyloView
......@@ -73,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge,BranchBirth] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
--------------------------------------------------
......@@ -100,7 +110,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.5 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
......@@ -196,12 +206,11 @@ phylo1 = addPhyloLevel (1) phyloFis phylo'
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
-------------------------------------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (getPhyloFis phylo') True 1 1
phylo' :: Phylo
phylo' = docsToFis phyloDocs phylo
phylo' = phylo & phylo_fis .~ phyloFis
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (docsToFis phyloDocs phylo) True 1 1
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
......
......@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last)
import Data.List ((++), sort, concat, nub, zip, last, null)
import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text)
import Data.Tuple.Extra
......@@ -187,11 +187,12 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
-- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
--------------------------------------
phylo0 :: Phylo
phylo0 = tracePhyloN 0
$ addPhyloLevel 0 phyloDocs phyloBase
-- phylo0 :: Phylo
-- phylo0 = tracePhyloN 0
-- $ addPhyloLevel 0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
......@@ -236,15 +237,14 @@ toPhylo1 clus prox d p = case clus of
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
$ setLevelLinks (0,1)
$ addPhyloLevel 1 phyloFis phylo'
-- $ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis) phyloFis
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (getPhyloFis phylo') k s t
--------------------------------------
phylo' :: Phylo
phylo' = docsToFis d p
phyloFis :: Phylo
phyloFis = if (null $ getPhyloFis p)
then p & phylo_fis .~ refineFis (docsToFis d p) k s t
else p & phylo_fis .~ docsToFis d p
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
......
......@@ -227,10 +227,9 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterPhyloGroups
(\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
then
let groups = map (\g -> g & phylo_groupPeriodParents .~ (trackPointers (reduceGroups g lvlGroups)
$ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers (reduceGroups g lvlGroups)
$ g ^. phylo_groupPeriodChilds )) gs
let groups = map (\g -> let m = reduceGroups g lvlGroups
in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
groups' = groups `using` parList rdeepseq
in groups'
else gs
......
......@@ -18,6 +18,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Main
where
import Data.GraphViz
import Data.Maybe
import Data.Text (Text)
......
......@@ -24,7 +24,7 @@ import Gargantext.Viz.Phylo.Tools
import Control.Lens hiding (Level)
import Data.List ((\\), sortOn, concat, nub, take, union, intersect, null, (++), sort)
import Data.Map (Map, (!), foldlWithKey, toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Map (Map, (!), toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Text (Text)
-- import Debug.Trace (trace)
......@@ -37,27 +37,25 @@ import Data.Text (Text)
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ foldlWithKey (\s (x,_) v -> if x == j
then s + v
else s ) 0 m
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / 2
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / 2
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the coverage score of a given ngram
coverage :: Map (Int, Int) Double -> [Int] -> Int -> Double
coverage m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / 2
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Process some metrics on top of ngrams
......@@ -65,7 +63,7 @@ getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("coverage" , map (\n -> coverage m (ngrams \\ [n]) n) ngrams )]
("inclusion" , map (\n -> inclusion m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix
......@@ -96,14 +94,14 @@ findDynamics n pv pn m =
bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease
then 0
else if ((fst prd) == (fst $ m ! n))
-- | emergence
then 1
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | recombination
then 2
else if ((fst prd) == (fst $ m ! n))
-- | recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | decrease
then 1
else 3
......
......@@ -23,7 +23,7 @@ import Data.GraphViz hiding (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++),unwords,concat,sortOn,nub,sort,group)
import Data.List ((++),unwords,concat,sortOn,nub)
import Data.Map (Map,toList,(!))
import Data.Maybe (isNothing,fromJust)
import Data.Text.Lazy (fromStrict, pack, unpack)
......@@ -134,16 +134,18 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea
colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d
| d == 0 = H.BGColor (toColor LightPink)
| d == 1 = H.BGColor (toColor PaleGreen)
| d == 2 = H.BGColor (toColor SkyBlue)
| d == 0 = H.BGColor (toColor PaleGreen)
| d == 1 = H.BGColor (toColor SkyBlue)
| d == 2 = H.BGColor (toColor LightPink)
| otherwise = H.Color (toColor Black)
getGroupDynamic :: [Double] -> H.Attribute
getGroupDynamic dy = colorFromDynamics $ head' "getGroupDynamic" (head' "getGroupDynamic" $ reverse $ sortOn length $ group $ sort dy)
getGroupDynamic dy
| elem 0 dy = colorFromDynamics 0
| elem 1 dy = colorFromDynamics 1
| elem 2 dy = colorFromDynamics 2
| otherwise = colorFromDynamics 3
-- | To set an HTML table
......@@ -151,21 +153,33 @@ setHtmlTable :: PhyloNode -> H.Label
setHtmlTable pn = H.Table H.HTable
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header] <> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4 $ zip (fromJust $ pn ^. pn_ngrams) dynamics) }
, H.tableRows = [header]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
<> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (fromJust $ pn ^. pn_ngrams) $ zip dynamics inclusion) }
where
--------------------------------------
ngramsToRow :: [(Ngrams,Double)] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,d) -> H.LabelCell [H.BAlign H.HLeft,colorFromDynamics d]
ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) -> H.LabelCell [H.Align H.HLeft,colorFromDynamics d]
$ H.Text [H.Str $ fromStrict n]) ns
--------------------------------------
inclusion :: [Double]
inclusion = (pn ^. pn_metrics) ! "inclusion"
--------------------------------------
dynamics :: [Double]
dynamics = (pn ^. pn_metrics) ! "dynamics"
--------------------------------------
header :: H.Row
header = H.Cells [H.LabelCell [getGroupDynamic dynamics]
$ H.Text [H.Str $ (fromStrict . T.toUpper) $ pn ^. pn_label]]
$ H.Text [H.Str $ (((fromStrict . T.toUpper) $ pn ^. pn_label)
<> (fromStrict " ( ")
<> (pack $ show (fst $ getNodePeriod pn))
<> (fromStrict " , ")
<> (pack $ show (snd $ getNodePeriod pn))
<> (fromStrict " ) "))]]
--------------------------------------
......
......@@ -47,10 +47,22 @@ branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
$ getNodesInBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchBirth :: PhyloView -> PhyloView
branchBirth v = foldl (\v' b -> let bId = (fst . (head' "branchBirth")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "birth" (fromIntegral $ fst $ head' "branchAge" prds) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To process a list of Metrics to a PhyloView
processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms
......
......@@ -36,10 +36,22 @@ sortBranchByAge o v = v & pv_branches %~ f
Desc -> reverse $ sortOn (getBranchMeta "age") xs
--------------------------------------
-- | To sort a PhyloView by Birth date of a branch
sortBranchByBirth :: Order -> PhyloView -> PhyloView
sortBranchByBirth o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "birth") xs
Desc -> reverse $ sortOn (getBranchMeta "birth") xs
--------------------------------------
-- | To process a Sort to a PhyloView
processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView
processSort s _p v = case s of
Nothing -> v
Just s' -> case fst s' of
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchBirth -> sortBranchByBirth (snd s') v
--_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found"
......@@ -18,16 +18,18 @@ module Gargantext.Viz.Phylo.View.Taggers
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take, union, (\\))
import Data.Text (Text)
import Data.Tuple (fst, snd)
import Data.Vector (Vector)
import Data.Map (Map, (!))
import Data.Map (Map, (!), empty, unionWith)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.Metrics
import qualified Data.Map as Map
import Control.Parallel.Strategies
-- import Debug.Trace (trace)
......@@ -82,26 +84,65 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getNodesByBranches v
getNthMostMeta :: Int -> Text -> PhyloGroup -> [Int]
getNthMostMeta nth meta g = map (\(idx,_) -> (getGroupNgrams g !! idx))
$ take nth
$ sortOn snd $ zip [0..]
$ (g ^. phylo_groupNgramsMeta) ! meta
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta thr "coverage" g
lbl = ngramsToLabel (getFoundationsRoots p) $ mostOccNgrams thr g
in n & pn_label .~ lbl) v
-- | To set the label of a PhyloNode as the nth most inclusives terms of its PhyloNodes
nodeLabelInc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "inclusion" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p)
$ getNthMostMeta thr ((g ^. phylo_groupNgramsMeta) ! "inclusion") (getGroupNgrams g)
in n & pn_label .~ lbl) v
nodeLabelInc' :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc' v nth p = over (pv_nodes
. traverse)
(\pn -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((pn ^. pn_metrics) ! "inclusion")
$ zip ((pn ^. pn_metrics) ! "dynamics") (pn ^. pn_idx)
in pn & pn_label .~ lbl) v
branchPeakInc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakInc v nth p =
let labels = map (\(id,nodes) ->
let cooc = foldl (\mem pn -> unionWith (+) mem (pn ^. pn_cooc)) empty nodes
ngrams = sort $ foldl (\mem pn -> union mem (pn ^. pn_idx)) [] nodes
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta nth inc ngrams
in (id, lbl))
$ getNodesByBranches v
labels' = labels `using` parList rdeepseq
in foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v labels'
-- | To process a sorted list of Taggers to a PhyloView
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
-- BranchPeakFreq -> branchPeakCooc v' 3 p
GroupLabelCooc -> nodeLabelCooc v' 2 p
_ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts
BranchPeakFreq -> branchPeakFreq v' 2 p
BranchPeakCooc -> branchPeakCooc v' 2 p
BranchPeakInc -> branchPeakInc v' 2 p
GroupLabelInc -> nodeLabelInc v' 2 p
GroupLabelIncDyn -> nodeLabelInc' v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p) v ts
......@@ -73,6 +73,7 @@ groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g
then Just (ngramsToText ns idxs)
else Nothing)
(g ^. phylo_groupNgramsMeta)
(g ^. phylo_groupCooc)
(if (not isR)
then Just (getGroupLevelParentsId g)
else Nothing)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment