Commit 2a7a57c8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PHYLO] merge

parents eb88099c 27c82dbe
...@@ -26,7 +26,7 @@ import System.Directory (doesFileExist) ...@@ -26,7 +26,7 @@ import System.Directory (doesFileExist)
import Data.Aeson import Data.Aeson
import Data.Text (Text, unwords, unlines) import Data.Text (Text, unwords, unlines)
import Data.List ((++)) import Data.List ((++),concat)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -37,6 +37,8 @@ import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile) ...@@ -37,6 +37,8 @@ import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Control.Monad (mapM)
import System.Environment import System.Environment
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
...@@ -48,6 +50,8 @@ import Gargantext.Viz.Phylo.View.ViewMaker ...@@ -48,6 +50,8 @@ import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Data.Maybe import Data.Maybe
import Control.Concurrent.Async as CCA (mapConcurrently)
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.List as DL import qualified Data.List as DL
...@@ -141,7 +145,8 @@ wosToCorpus limit path = DL.take limit ...@@ -141,7 +145,8 @@ wosToCorpus limit path = DL.take limit
. filter (\d -> (isJust $_hyperdataDocument_publication_year d) . filter (\d -> (isJust $_hyperdataDocument_publication_year d)
&& (isJust $_hyperdataDocument_title d) && (isJust $_hyperdataDocument_title d)
&& (isJust $_hyperdataDocument_abstract d)) && (isJust $_hyperdataDocument_abstract d))
<$> parseFile WOS path . concat
<$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20]
-- | To use the correct parser given a CorpusType -- | To use the correct parser given a CorpusType
...@@ -211,7 +216,7 @@ main = do ...@@ -211,7 +216,7 @@ main = do
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf) (reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf)) (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
let phylo = toPhylo query corpus termList fis' let phylo = toPhylo query corpus termList fis'
......
...@@ -198,7 +198,7 @@ type Ngrams = Text ...@@ -198,7 +198,7 @@ type Ngrams = Text
data Document = Document data Document = Document
{ date :: Date { date :: Date
, text :: [Ngrams] , text :: [Ngrams]
} deriving (Show,Generic) } deriving (Show,Generic,NFData)
-- | Clique : Set of ngrams cooccurring in the same Document -- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams type Clique = Set Ngrams
...@@ -209,7 +209,7 @@ data PhyloFis = PhyloFis ...@@ -209,7 +209,7 @@ data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique { _phyloFis_clique :: Clique
, _phyloFis_support :: Support , _phyloFis_support :: Support
, _phyloFis_period :: (Date,Date) , _phyloFis_period :: (Date,Date)
} deriving (Generic,Show,Eq) } deriving (Generic,NFData,Show,Eq)
-- | A list of clustered PhyloGroup -- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup] type PhyloCluster = [PhyloGroup]
...@@ -309,7 +309,7 @@ data SBParams = SBParams ...@@ -309,7 +309,7 @@ data SBParams = SBParams
-- | Metric constructors -- | Metric constructors
data Metric = BranchAge deriving (Generic, Show, Eq, Read) data Metric = BranchAge | BranchBirth deriving (Generic, Show, Eq, Read)
---------------- ----------------
...@@ -318,7 +318,8 @@ data Metric = BranchAge deriving (Generic, Show, Eq, Read) ...@@ -318,7 +318,8 @@ data Metric = BranchAge deriving (Generic, Show, Eq, Read)
-- | Tagger constructors -- | Tagger constructors
data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read) data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
| GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
-------------- --------------
...@@ -327,7 +328,7 @@ data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Gen ...@@ -327,7 +328,7 @@ data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Gen
-- | Sort constructors -- | Sort constructors
data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded) data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
data Order = Asc | Desc deriving (Generic, Show, Read) data Order = Asc | Desc deriving (Generic, Show, Read)
...@@ -408,6 +409,7 @@ data PhyloNode = PhyloNode ...@@ -408,6 +409,7 @@ data PhyloNode = PhyloNode
, _pn_idx :: [Int] , _pn_idx :: [Int]
, _pn_ngrams :: Maybe [Ngrams] , _pn_ngrams :: Maybe [Ngrams]
, _pn_metrics :: Map Text [Double] , _pn_metrics :: Map Text [Double]
, _pn_cooc :: Map (Int,Int) Double
, _pn_parents :: Maybe [PhyloGroupId] , _pn_parents :: Maybe [PhyloGroupId]
, _pn_childs :: [PhyloNode] , _pn_childs :: [PhyloNode]
} deriving (Generic, Show) } deriving (Generic, Show)
......
...@@ -17,8 +17,7 @@ Portability : POSIX ...@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates module Gargantext.Viz.Phylo.Aggregates
where where
import Control.Parallel.Strategies
import Control.Lens hiding (makeLenses, both, Level)
import Gargantext.Prelude hiding (elem) import Gargantext.Prelude hiding (elem)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
...@@ -29,12 +28,15 @@ import Gargantext.Viz.Phylo.Tools ...@@ -29,12 +28,15 @@ import Gargantext.Viz.Phylo.Tools
import Debug.Trace (trace) import Debug.Trace (trace)
import Data.List (partition, concat, nub, elem, sort, (++), null) import Data.List (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, toList, elems, keys, unionWith, mapWithKey) import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set (size) import Data.Set (size)
import Data.Text (Text, unwords) import Data.Text (Text, unwords)
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
...@@ -53,9 +55,13 @@ termListToNgrams = map (\(lbl,_) -> unwords lbl) ...@@ -53,9 +55,13 @@ termListToNgrams = map (\(lbl,_) -> unwords lbl)
------------------- -------------------
-- | To group a list of Documents by fixed periods -- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ fromList $ zip pds $ map (inPeriode f es) pds groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
...@@ -161,28 +167,45 @@ filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] ...@@ -161,28 +167,45 @@ filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis -- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ map getClique l) (map getClique l) [] filterFisByNested m =
in filter (\fis -> elem (getClique fis) cliqueMax) l) let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | Choose if we use a set of Fis from a file or if we have to create them -- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Phylo docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p) docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n") then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) $ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n") else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m)) $ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)
-- | Process some filters on top of a set of Fis -- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis] refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by clique size :\n" refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested $ filterFisByNested
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by support :\n" $ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport) $ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis $ traceFis "----\nUnfiltered Fis :\n" fis
......
...@@ -28,6 +28,9 @@ TODO: ...@@ -28,6 +28,9 @@ TODO:
module Gargantext.Viz.Phylo.Example where module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Control.Lens hiding (both, Level)
import Data.Text (Text, toLower) import Data.Text (Text, toLower)
import Data.List ((++)) import Data.List ((++))
import Data.Map (Map,empty) import Data.Map (Map,empty)
...@@ -43,16 +46,23 @@ import Gargantext.Viz.Phylo.LevelMaker ...@@ -43,16 +46,23 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.ViewMaker import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Main (writePhylo) import Gargantext.Viz.Phylo.Main (writePhylo)
import GHC.IO (FilePath) import GHC.IO (FilePath)
import qualified Data.List as List import qualified Data.List as List
------------------------------------------------------ ------------------------------------------------------
-- | STEP 12 | -- Create a PhyloView from a user Query -- | STEP 12 | -- Create a PhyloView from a user Query
------------------------------------------------------ ------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
phyloExport :: FilePath -> IO FilePath phyloExport :: FilePath -> IO FilePath
phyloExport fp = writePhylo fp phyloView phyloExport fp = writePhylo fp phyloView
...@@ -73,7 +83,7 @@ queryViewEx = "level=3" ...@@ -73,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge,BranchBirth] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
-------------------------------------------------- --------------------------------------------------
...@@ -100,7 +110,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -100,7 +110,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0) 3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.5 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
...@@ -196,12 +206,11 @@ phylo1 = addPhyloLevel (1) phyloFis phylo' ...@@ -196,12 +206,11 @@ phylo1 = addPhyloLevel (1) phyloFis phylo'
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them -- | STEP 5 | -- Create lists of Frequent Items Set and filter them
------------------------------------------------------------------- -------------------------------------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (getPhyloFis phylo') True 1 1
phylo' :: Phylo phylo' :: Phylo
phylo' = docsToFis phyloDocs phylo phylo' = phylo & phylo_fis .~ phyloFis
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (docsToFis phyloDocs phylo) True 1 1
---------------------------------------- ----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0 -- | STEP 2 | -- Init a Phylo of level 0
......
...@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker ...@@ -21,7 +21,7 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Parallel.Strategies import Control.Parallel.Strategies
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last) import Data.List ((++), sort, concat, nub, zip, last, null)
import Data.Map (Map, (!), empty, singleton) import Data.Map (Map, (!), empty, singleton)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra import Data.Tuple.Extra
...@@ -187,11 +187,12 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching ...@@ -187,11 +187,12 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching
where where
-------------------------------------- --------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0 phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
-- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
-------------------------------------- --------------------------------------
phylo0 :: Phylo -- phylo0 :: Phylo
phylo0 = tracePhyloN 0 -- phylo0 = tracePhyloN 0
$ addPhyloLevel 0 phyloDocs phyloBase -- $ addPhyloLevel 0 phyloDocs phyloBase
-------------------------------------- --------------------------------------
phyloDocs :: Map (Date, Date) [Document] phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
...@@ -236,15 +237,14 @@ toPhylo1 clus prox d p = case clus of ...@@ -236,15 +237,14 @@ toPhylo1 clus prox d p = case clus of
$ traceTempoMatching Ascendant 1 $ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox $ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1 $ tracePhyloN 1
$ setLevelLinks (0,1) -- $ setLevelLinks (0,1)
$ addPhyloLevel 1 phyloFis phylo' $ addPhyloLevel 1 (getPhyloFis phyloFis) phyloFis
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Phylo
phyloFis = refineFis (getPhyloFis phylo') k s t phyloFis = if (null $ getPhyloFis p)
-------------------------------------- then p & phylo_fis .~ refineFis (docsToFis d p) k s t
phylo' :: Phylo else p & phylo_fis .~ docsToFis d p
phylo' = docsToFis d p
-------------------------------------- --------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized" _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
......
...@@ -227,10 +227,9 @@ transposePeriodLinks :: Level -> Phylo -> Phylo ...@@ -227,10 +227,9 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterPhyloGroups transposePeriodLinks lvl p = alterPhyloGroups
(\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs) (\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
then then
let groups = map (\g -> g & phylo_groupPeriodParents .~ (trackPointers (reduceGroups g lvlGroups) let groups = map (\g -> let m = reduceGroups g lvlGroups
$ g ^. phylo_groupPeriodParents) in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers (reduceGroups g lvlGroups) & phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
$ g ^. phylo_groupPeriodChilds )) gs
groups' = groups `using` parList rdeepseq groups' = groups `using` parList rdeepseq
in groups' in groups'
else gs else gs
......
...@@ -18,6 +18,7 @@ Portability : POSIX ...@@ -18,6 +18,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Main module Gargantext.Viz.Phylo.Main
where where
import Data.GraphViz import Data.GraphViz
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
......
...@@ -24,7 +24,7 @@ import Gargantext.Viz.Phylo.Tools ...@@ -24,7 +24,7 @@ import Gargantext.Viz.Phylo.Tools
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Data.List ((\\), sortOn, concat, nub, take, union, intersect, null, (++), sort) import Data.List ((\\), sortOn, concat, nub, take, union, intersect, null, (++), sort)
import Data.Map (Map, (!), foldlWithKey, toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith) import Data.Map (Map, (!), toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Text (Text) import Data.Text (Text)
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
...@@ -37,27 +37,25 @@ import Data.Text (Text) ...@@ -37,27 +37,25 @@ import Data.Text (Text)
-- | Return the conditional probability of i knowing j -- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m) conditional m i j = (findWithDefault 0 (i,j) m)
/ foldlWithKey (\s (x,_) v -> if x == j / (m ! (j,j))
then s + v
else s ) 0 m
-- | Return the genericity score of a given ngram -- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l) genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / 2 - (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram -- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l) specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / 2 - (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the coverage score of a given ngram -- | Return the inclusion score of a given ngram
coverage :: Map (Int, Int) Double -> [Int] -> Int -> Double inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
coverage m l i = ( (sum $ map (\j -> conditional m j i) l) inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / 2 + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Process some metrics on top of ngrams -- | Process some metrics on top of ngrams
...@@ -65,7 +63,7 @@ getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double] ...@@ -65,7 +63,7 @@ getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ), [ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ), ("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("coverage" , map (\n -> coverage m (ngrams \\ [n]) n) ngrams )] ("inclusion" , map (\n -> inclusion m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix -- | To get the nth most occurent elems in a coocurency matrix
...@@ -96,14 +94,14 @@ findDynamics n pv pn m = ...@@ -96,14 +94,14 @@ findDynamics n pv pn m =
bid = fromJust $ (pn ^. pn_bid) bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease
then 0
else if ((fst prd) == (fst $ m ! n))
-- | emergence -- | emergence
then 1
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | recombination
then 2 then 2
else if ((fst prd) == (fst $ m ! n))
-- | recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | decrease
then 1
else 3 else 3
......
...@@ -23,7 +23,7 @@ import Data.GraphViz hiding (DotGraph) ...@@ -23,7 +23,7 @@ import Data.GraphViz hiding (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType) import Data.GraphViz.Attributes.Complete hiding (EdgeType)
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic import Data.GraphViz.Types.Monadic
import Data.List ((++),unwords,concat,sortOn,nub,sort,group) import Data.List ((++),unwords,concat,sortOn,nub)
import Data.Map (Map,toList,(!)) import Data.Map (Map,toList,(!))
import Data.Maybe (isNothing,fromJust) import Data.Maybe (isNothing,fromJust)
import Data.Text.Lazy (fromStrict, pack, unpack) import Data.Text.Lazy (fromStrict, pack, unpack)
...@@ -134,16 +134,18 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea ...@@ -134,16 +134,18 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea
colorFromDynamics :: Double -> H.Attribute colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d colorFromDynamics d
| d == 0 = H.BGColor (toColor LightPink) | d == 0 = H.BGColor (toColor PaleGreen)
| d == 1 = H.BGColor (toColor PaleGreen) | d == 1 = H.BGColor (toColor SkyBlue)
| d == 2 = H.BGColor (toColor SkyBlue) | d == 2 = H.BGColor (toColor LightPink)
| otherwise = H.Color (toColor Black) | otherwise = H.Color (toColor Black)
getGroupDynamic :: [Double] -> H.Attribute getGroupDynamic :: [Double] -> H.Attribute
getGroupDynamic dy = colorFromDynamics $ head' "getGroupDynamic" (head' "getGroupDynamic" $ reverse $ sortOn length $ group $ sort dy) getGroupDynamic dy
| elem 0 dy = colorFromDynamics 0
| elem 1 dy = colorFromDynamics 1
| elem 2 dy = colorFromDynamics 2
| otherwise = colorFromDynamics 3
-- | To set an HTML table -- | To set an HTML table
...@@ -151,21 +153,33 @@ setHtmlTable :: PhyloNode -> H.Label ...@@ -151,21 +153,33 @@ setHtmlTable :: PhyloNode -> H.Label
setHtmlTable pn = H.Table H.HTable setHtmlTable pn = H.Table H.HTable
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft] { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)] , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header] <> (if isNothing $ pn ^. pn_ngrams , H.tableRows = [header]
then [] <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
else map ngramsToRow $ splitEvery 4 $ zip (fromJust $ pn ^. pn_ngrams) dynamics) } <> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (fromJust $ pn ^. pn_ngrams) $ zip dynamics inclusion) }
where where
-------------------------------------- --------------------------------------
ngramsToRow :: [(Ngrams,Double)] -> H.Row ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,d) -> H.LabelCell [H.BAlign H.HLeft,colorFromDynamics d] ngramsToRow ns = H.Cells $ map (\(n,(d,_)) -> H.LabelCell [H.Align H.HLeft,colorFromDynamics d]
$ H.Text [H.Str $ fromStrict n]) ns $ H.Text [H.Str $ fromStrict n]) ns
-------------------------------------- --------------------------------------
inclusion :: [Double]
inclusion = (pn ^. pn_metrics) ! "inclusion"
--------------------------------------
dynamics :: [Double] dynamics :: [Double]
dynamics = (pn ^. pn_metrics) ! "dynamics" dynamics = (pn ^. pn_metrics) ! "dynamics"
-------------------------------------- --------------------------------------
header :: H.Row header :: H.Row
header = H.Cells [H.LabelCell [getGroupDynamic dynamics] header = H.Cells [H.LabelCell [getGroupDynamic dynamics]
$ H.Text [H.Str $ (fromStrict . T.toUpper) $ pn ^. pn_label]] $ H.Text [H.Str $ (((fromStrict . T.toUpper) $ pn ^. pn_label)
<> (fromStrict " ( ")
<> (pack $ show (fst $ getNodePeriod pn))
<> (fromStrict " , ")
<> (pack $ show (snd $ getNodePeriod pn))
<> (fromStrict " ) "))]]
-------------------------------------- --------------------------------------
......
...@@ -47,10 +47,22 @@ branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b ...@@ -47,10 +47,22 @@ branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
$ getNodesInBranches v $ getNodesInBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchBirth :: PhyloView -> PhyloView
branchBirth v = foldl (\v' b -> let bId = (fst . (head' "branchBirth")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "birth" (fromIntegral $ fst $ head' "branchAge" prds) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To process a list of Metrics to a PhyloView -- | To process a list of Metrics to a PhyloView
processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v' BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found" -- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms ) v ms
......
...@@ -36,10 +36,22 @@ sortBranchByAge o v = v & pv_branches %~ f ...@@ -36,10 +36,22 @@ sortBranchByAge o v = v & pv_branches %~ f
Desc -> reverse $ sortOn (getBranchMeta "age") xs Desc -> reverse $ sortOn (getBranchMeta "age") xs
-------------------------------------- --------------------------------------
-- | To sort a PhyloView by Birth date of a branch
sortBranchByBirth :: Order -> PhyloView -> PhyloView
sortBranchByBirth o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "birth") xs
Desc -> reverse $ sortOn (getBranchMeta "birth") xs
--------------------------------------
-- | To process a Sort to a PhyloView -- | To process a Sort to a PhyloView
processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView
processSort s _p v = case s of processSort s _p v = case s of
Nothing -> v Nothing -> v
Just s' -> case fst s' of Just s' -> case fst s' of
ByBranchAge -> sortBranchByAge (snd s') v ByBranchAge -> sortBranchByAge (snd s') v
ByBranchBirth -> sortBranchByBirth (snd s') v
--_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found" --_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found"
...@@ -18,16 +18,18 @@ module Gargantext.Viz.Phylo.View.Taggers ...@@ -18,16 +18,18 @@ module Gargantext.Viz.Phylo.View.Taggers
where where
import Control.Lens hiding (makeLenses, both, Level) import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take) import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take, union, (\\))
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Map (Map, (!)) import Data.Map (Map, (!), empty, unionWith)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.Metrics
import qualified Data.Map as Map import qualified Data.Map as Map
import Control.Parallel.Strategies
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
...@@ -82,26 +84,65 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v ...@@ -82,26 +84,65 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getNodesByBranches v $ getNodesByBranches v
getNthMostMeta :: Int -> Text -> PhyloGroup -> [Int] getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta g = map (\(idx,_) -> (getGroupNgrams g !! idx)) getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth $ take nth
$ sortOn snd $ zip [0..] $ reverse
$ (g ^. phylo_groupNgramsMeta) ! meta $ sortOn snd $ zip [0..] meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes -- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes nodeLabelCooc v thr p = over (pv_nodes
. traverse) . traverse)
(\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p (\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta thr "coverage" g lbl = ngramsToLabel (getFoundationsRoots p) $ mostOccNgrams thr g
in n & pn_label .~ lbl) v in n & pn_label .~ lbl) v
-- | To set the label of a PhyloNode as the nth most inclusives terms of its PhyloNodes
nodeLabelInc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "inclusion" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p)
$ getNthMostMeta thr ((g ^. phylo_groupNgramsMeta) ! "inclusion") (getGroupNgrams g)
in n & pn_label .~ lbl) v
nodeLabelInc' :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc' v nth p = over (pv_nodes
. traverse)
(\pn -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((pn ^. pn_metrics) ! "inclusion")
$ zip ((pn ^. pn_metrics) ! "dynamics") (pn ^. pn_idx)
in pn & pn_label .~ lbl) v
branchPeakInc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakInc v nth p =
let labels = map (\(id,nodes) ->
let cooc = foldl (\mem pn -> unionWith (+) mem (pn ^. pn_cooc)) empty nodes
ngrams = sort $ foldl (\mem pn -> union mem (pn ^. pn_idx)) [] nodes
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta nth inc ngrams
in (id, lbl))
$ getNodesByBranches v
labels' = labels `using` parList rdeepseq
in foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v labels'
-- | To process a sorted list of Taggers to a PhyloView -- | To process a sorted list of Taggers to a PhyloView
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p BranchPeakFreq -> branchPeakFreq v' 2 p
-- BranchPeakFreq -> branchPeakCooc v' 3 p BranchPeakCooc -> branchPeakCooc v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p BranchPeakInc -> branchPeakInc v' 2 p
_ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts GroupLabelInc -> nodeLabelInc v' 2 p
GroupLabelIncDyn -> nodeLabelInc' v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p) v ts
...@@ -73,6 +73,7 @@ groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g ...@@ -73,6 +73,7 @@ groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g
then Just (ngramsToText ns idxs) then Just (ngramsToText ns idxs)
else Nothing) else Nothing)
(g ^. phylo_groupNgramsMeta) (g ^. phylo_groupNgramsMeta)
(g ^. phylo_groupCooc)
(if (not isR) (if (not isR)
then Just (getGroupLevelParentsId g) then Just (getGroupLevelParentsId g)
else Nothing) else Nothing)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment