Commit 8aa7050d authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' of...

Merge branch 'dev-phylo' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantext into dev-merge
parents 87a8bd2c d746a99b
...@@ -17,7 +17,7 @@ Phylo binaries ...@@ -17,7 +17,7 @@ Phylo binaries
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-} {-# LANGUAGE Strict #-}
module Main where module Main where
...@@ -25,7 +25,7 @@ module Main where ...@@ -25,7 +25,7 @@ module Main where
import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import Data.Aeson import Data.Aeson
import Data.Text (Text, unwords) import Data.Text (Text, unwords, unlines)
import Data.List ((++)) import Data.List ((++))
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
...@@ -198,17 +198,13 @@ main = do ...@@ -198,17 +198,13 @@ main = do
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs") putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis") putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis let fis' = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf) let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf) (Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
...@@ -217,7 +213,7 @@ main = do ...@@ -217,7 +213,7 @@ main = do
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList mFis let phylo = toPhylo query corpus termList fis'
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo) writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
......
...@@ -153,6 +153,7 @@ data PhyloGroup = ...@@ -153,6 +153,7 @@ data PhyloGroup =
PhyloGroup { _phylo_groupId :: PhyloGroupId PhyloGroup { _phylo_groupId :: PhyloGroupId
, _phylo_groupLabel :: Text , _phylo_groupLabel :: Text
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupNgramsMeta :: Map Text [Double]
, _phylo_groupMeta :: Map Text Double , _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId , _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double , _phylo_groupCooc :: Map (Int,Int) Double
......
...@@ -32,6 +32,7 @@ import Gargantext.API.Types ...@@ -32,6 +32,7 @@ import Gargantext.API.Types
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId) import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Example import Gargantext.Viz.Phylo.Example
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.ViewMaker import Gargantext.Viz.Phylo.View.ViewMaker
...@@ -105,7 +106,7 @@ postPhylo _n _lId q = do ...@@ -105,7 +106,7 @@ postPhylo _n _lId q = do
vrs = Just ("1" :: Text) vrs = Just ("1" :: Text)
sft = Just (Software "Gargantext" "4") sft = Just (Software "Gargantext" "4")
prm = initPhyloParam vrs sft (Just q) prm = initPhyloParam vrs sft (Just q)
pure (toPhyloBase q prm corpus actants termList empty) pure (toPhyloBase q prm (parseDocs (initFoundationsRoots actants) corpus) termList empty)
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -6,41 +6,151 @@ License : AGPL + CECILL v3 ...@@ -6,41 +6,151 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Phylo.Aggregates.Fis module Gargantext.Viz.Phylo.Aggregates
where where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,concat,sort,(++)) import Control.Lens hiding (makeLenses, both, Level)
import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
import Data.Tuple (fst, snd) import Gargantext.Prelude hiding (elem)
import Data.Set (size) import Gargantext.Text.Context (TermList)
import Gargantext.Prelude import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map import Debug.Trace (trace)
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vector import Data.List (partition, concat, nub, elem, sort, (++), null)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, toList, elems, keys, unionWith, mapWithKey)
import Data.Set (size)
import Data.Text (Text, unwords)
import Data.Vector (Vector)
import Numeric.Statistics (percentile) import qualified Data.Vector as Vector
---------------------
-- | Foundations | --
---------------------
-- | Extract all the labels of a termList
termListToNgrams :: TermList -> [Ngrams]
termListToNgrams l = map (\(lbl,_) -> unwords lbl) l
-------------------
-- | Documents | --
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ fromList $ zip pds $ map (inPeriode f es) pds
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
-----------------
-- | Periods | --
-----------------
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end]
--------------
-- | Cooc | --
--------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToFullCombi $ fst x) l
where
--------------------------------------
idx :: [Int]
idx = nub $ concat $ map fst l
--------------------------------------
docs :: Double
docs = sum $ map snd l
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = fromList $ map (\x -> (x,0)) $ listToFullCombi idx
--------------------------------------
import Debug.Trace (trace)
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-------------
-- | Fis | --
-------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of filterFis keep thr f m = case keep of
False -> Map.map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> Map.map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- | To filter Fis with small Support
...@@ -59,23 +169,25 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m ...@@ -59,23 +169,25 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m
in filter (\fis -> elem (getClique fis) cliqueMax) l) in filter (\fis -> elem (getClique fis) cliqueMax) l)
docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo -- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis' m p = if (null $ getPhyloFis p) docsToFis :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n") then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) $ p & phylo_fis .~ mapWithKey (\k docs -> let fis = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n") else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m)) $ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis] -- | Process some filters on top of a set of Fis
toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n" refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique) $ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n" $ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested $ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n" $ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport) $ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis $ traceFis "----\nUnfiltered Fis :\n" fis
----------------- -----------------
...@@ -83,26 +195,15 @@ toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n" ...@@ -83,26 +195,15 @@ toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
----------------- -----------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis] traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n" traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
<> "support : " <> show (percentile 25 (Vector.fromList supps)) <> " (25%) " <> "support : " <> show (countSup 1 supps) <> " (>1) "
<> show (percentile 50 (Vector.fromList supps)) <> " (50%) "
<> show (percentile 75 (Vector.fromList supps)) <> " (75%) "
<> show (percentile 90 (Vector.fromList supps)) <> " (90%) "
<> show (percentile 100 (Vector.fromList supps)) <> " (100%)\n"
<> " " <> show (countSup 1 supps) <> " (>1) "
<> show (countSup 2 supps) <> " (>2) " <> show (countSup 2 supps) <> " (>2) "
<> show (countSup 3 supps) <> " (>3) " <> show (countSup 3 supps) <> " (>3) "
<> show (countSup 4 supps) <> " (>4) " <> show (countSup 4 supps) <> " (>4) "
<> show (countSup 5 supps) <> " (>5) " <> show (countSup 5 supps) <> " (>5) "
<> show (countSup 6 supps) <> " (>6)\n" <> show (countSup 6 supps) <> " (>6)\n"
<> "clique size : " <> show (percentile 25 (Vector.fromList ngrms)) <> " (25%) " <> "clique size : " <> show (countSup 1 ngrms) <> " (>1) "
<> show (percentile 50 (Vector.fromList ngrms)) <> " (50%) "
<> show (percentile 75 (Vector.fromList ngrms)) <> " (75%) "
<> show (percentile 90 (Vector.fromList ngrms)) <> " (90%) "
<> show (percentile 100 (Vector.fromList ngrms)) <> " (100%)\n"
<> " " <> show (countSup 1 ngrms) <> " (>1) "
<> show (countSup 2 ngrms) <> " (>2) " <> show (countSup 2 ngrms) <> " (>2) "
<> show (countSup 3 ngrms) <> " (>3) " <> show (countSup 3 ngrms) <> " (>3) "
<> show (countSup 4 ngrms) <> " (>4) " <> show (countSup 4 ngrms) <> " (>4) "
...@@ -118,5 +219,5 @@ traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) < ...@@ -118,5 +219,5 @@ traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <
supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
-------------------------------------- --------------------------------------
ngrms :: [Double] ngrms :: [Double]
ngrms = sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m ngrms = sort $ map (\f -> fromIntegral $ size $ _phyloFis_clique f) $ concat $ elems m
-------------------------------------- --------------------------------------
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Aggregates.Cooc
where
import Data.List (union,concat,nub,sort, sortOn)
import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map
import qualified Data.Set as Set
-- import Debug.Trace (trace)
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToDirectedCombiWith (\y -> getIdxInRoots y p) $ (Set.toList . getClique) x)
$ (concat . elems) m
where
--------------------------------------
fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . getClique) x) [] $ (concat . elems) m
--------------------------------------
docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (getSupport x)) 0 $ (concat . elems) m
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToDirectedCombiWith (\y -> getIdxInRoots y p) fisNgrams)
--------------------------------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToFullCombi $ fst x) l
where
--------------------------------------
idx :: [Int]
idx = nub $ concat $ map fst l
--------------------------------------
docs :: Double
docs = sum $ map snd l
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) $ listToFullCombi idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ Map.toList cooc
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Aggregates.Document
where
import Data.Map (Map,fromListWith)
import Data.Text (Text)
import Data.Tuple (fst)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Vector as Vector
import Debug.Trace (trace)
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end]
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
...@@ -19,13 +19,14 @@ module Gargantext.Viz.Phylo.BranchMaker ...@@ -19,13 +19,14 @@ module Gargantext.Viz.Phylo.BranchMaker
import Control.Parallel.Strategies import Control.Parallel.Strategies
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete) import Data.List (concat,nub,(++),sortOn,reverse,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems) import Data.Map (Map,(!), fromListWith, elems)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Cluster
import Gargantext.Viz.Phylo.Aggregates.Cooc import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Metrics
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
......
...@@ -15,18 +15,18 @@ Portability : POSIX ...@@ -15,18 +15,18 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Viz.Phylo.Aggregates.Cluster module Gargantext.Viz.Phylo.Cluster
where where
import Control.Parallel.Strategies import Control.Parallel.Strategies
import Data.List (null,concat,sort,intersect,(++)) import Data.Graph.Clustering.Louvain.CplusPlus
import Data.Map (Map) import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
import Data.Map (Map, fromList, mapKeys)
import Data.Tuple (fst) import Data.Tuple (fst)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -35,6 +35,40 @@ import Debug.Trace (trace) ...@@ -35,6 +35,40 @@ import Debug.Trace (trace)
import Numeric.Statistics (percentile) import Numeric.Statistics (percentile)
--------------
-- | Algo | --
--------------
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where
--------------------------------------
idx :: PhyloGroup -> Int
idx e = case elemIndex e nodes of
Nothing -> panic "[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
Just i -> i
--------------------------------------
-----------------------
-- | Cluster Maker | --
-----------------------
-- | Optimisation to filter only relevant candidates -- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)] getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')) getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
......
...@@ -29,19 +29,16 @@ TODO: ...@@ -29,19 +29,16 @@ TODO:
module Gargantext.Viz.Phylo.Example where module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.Text (Text) import Data.Text (Text, toLower)
import Data.List ((++), last) import Data.List ((++))
import Data.Map (Map,empty) import Data.Map (Map,empty)
import Data.Tuple (fst) import Data.Tuple (fst)
import Data.Tuple.Extra
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster import Gargantext.Viz.Phylo.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
...@@ -78,7 +75,7 @@ queryViewEx = "level=3" ...@@ -78,7 +75,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
-------------------------------------------------- --------------------------------------------------
...@@ -87,7 +84,7 @@ phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,G ...@@ -87,7 +84,7 @@ phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,G
phyloFromQuery :: Phylo phyloFromQuery :: Phylo
phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList empty phyloFromQuery = toPhylo phyloQueryBuild docs termList empty
-- | To do : create a request handler and a query parser -- | To do : create a request handler and a query parser
queryParser :: [Char] -> PhyloQueryBuild queryParser :: [Char] -> PhyloQueryBuild
...@@ -105,7 +102,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -105,7 +102,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0) 3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
...@@ -155,7 +152,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1 ...@@ -155,7 +152,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster] phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1 phyloCluster = phyloToClusters 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
---------------------------------- ----------------------------------
...@@ -186,30 +183,27 @@ phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1 ...@@ -186,30 +183,27 @@ phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1
phylo1_0_1 :: Phylo phylo1_0_1 :: Phylo
phylo1_0_1 = setLevelLinks (0,1) phylo1_1_0 phylo1_0_1 = setLevelLinks (0,1) phylo1
phylo1_1_0 :: Phylo -- phylo1_1_0 :: Phylo
phylo1_1_0 = setLevelLinks (1,0) phylo1 -- phylo1_1_0 = setLevelLinks (1,0) phylo1
phylo1 :: Phylo phylo1 :: Phylo
phylo1 = addPhyloLevel (1) phyloFis phylo phylo1 = addPhyloLevel (1) phyloFis phylo'
------------------------------------------------------------------- -------------------------------------------------------------------
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them -- | STEP 5 | -- Create lists of Frequent Items Set and filter them
------------------------------------------------------------------- -------------------------------------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFis True 1 (filterFisByClique) phyloFis = refineFis (getPhyloFis phylo') True 1 1
$ filterFisByNested
$ filterFis True 1 (filterFisBySupport) (getPhyloFis phylo')
phylo' :: Phylo phylo' :: Phylo
phylo' = docsToFis' phyloDocs phylo phylo' = docsToFis phyloDocs phylo
---------------------------------------- ----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0 -- | STEP 2 | -- Init a Phylo of level 0
...@@ -221,7 +215,7 @@ phylo = addPhyloLevel 0 phyloDocs phyloBase ...@@ -221,7 +215,7 @@ phylo = addPhyloLevel 0 phyloDocs phyloBase
phyloDocs :: Map (Date, Date) [Document] phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs corpus phyloBase phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) docs
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -229,32 +223,25 @@ phyloDocs = corpusToDocs corpus phyloBase ...@@ -229,32 +223,25 @@ phyloDocs = corpusToDocs corpus phyloBase
------------------------------------------------------------------------ ------------------------------------------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc empty defaultPhyloParam phyloBase = toPhyloBase phyloQueryBuild phyloParam docs termList empty
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double phyloParam :: PhyloParam
nbDocs = countDocs corpus phyloParam = (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just phyloQueryBuild))
periods :: [(Date,Date)] docs :: [Document]
periods = initPeriods 5 3 docs = parseDocs foundationsRoots corpus
$ both fst (head' "Example" corpus,last corpus)
foundationsRoots :: Vector Ngrams foundationsRoots :: Vector Ngrams
foundationsRoots = initFoundationsRoots actants foundationsRoots = initFoundationsRoots (termListToNgrams termList)
-------------------------------------------- --------------------------------------------
-- | STEP 0 | -- Let's start with an example -- | STEP 0 | -- Let's start with an example
-------------------------------------------- --------------------------------------------
-- this is a comment
termList :: TermList termList :: TermList
termList = [] termList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams] actants :: [Ngrams]
actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV" actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
......
...@@ -27,13 +27,12 @@ import Data.Text (Text) ...@@ -27,13 +27,12 @@ import Data.Text (Text)
import Data.Tuple.Extra import Data.Tuple.Extra
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster import Gargantext.Viz.Phylo.Metrics
import Gargantext.Viz.Phylo.Aggregates.Document import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Aggregates.Fis import Gargantext.Viz.Phylo.Cluster
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -44,6 +43,11 @@ import Debug.Trace (trace) ...@@ -44,6 +43,11 @@ import Debug.Trace (trace)
import Numeric.Statistics (percentile) import Numeric.Statistics (percentile)
-------------------------
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions -- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate class PhyloLevelMaker aggregate
where where
...@@ -58,7 +62,7 @@ instance PhyloLevelMaker PhyloCluster ...@@ -58,7 +62,7 @@ instance PhyloLevelMaker PhyloCluster
-------------------------------------- --------------------------------------
-- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo -- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
addPhyloLevel lvl m p addPhyloLevel lvl m p
| lvl > 1 = toPhyloLevel lvl m p | lvl > 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
...@@ -74,7 +78,7 @@ instance PhyloLevelMaker PhyloFis ...@@ -74,7 +78,7 @@ instance PhyloLevelMaker PhyloFis
-------------------------------------- --------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo -- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
addPhyloLevel lvl m p addPhyloLevel lvl m p
| lvl == 1 = toPhyloLevel lvl m p | lvl == 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
...@@ -90,7 +94,7 @@ instance PhyloLevelMaker Document ...@@ -90,7 +94,7 @@ instance PhyloLevelMaker Document
-------------------------------------- --------------------------------------
-- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo -- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
addPhyloLevel lvl m p addPhyloLevel lvl m p
| lvl == 0 = toPhyloLevel lvl m p | lvl == 0 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
...@@ -100,32 +104,35 @@ instance PhyloLevelMaker Document ...@@ -100,32 +104,35 @@ instance PhyloLevelMaker Document
-------------------------------------- --------------------------------------
-- | To transform a Cluster into a Phylogroup -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
clusterToGroup prd lvl idx lbl groups _m p = addPhyloLevel' lvl m p = alterPhyloPeriods
PhyloGroup ((prd, lvl), idx) lbl ngrams empty (\period -> let pId = _phylo_periodId period
Nothing in over (phylo_periodLevels)
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)) (\phyloLevels ->
ascLink desLink [] childs let groups = toPhyloGroups lvl pId (m ! pId) m p
where in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
-------------------------------------- ) period) p
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups ----------------------
desLink = concat $ map getGroupPeriodChilds groups -- | toPhyloGroup | --
-------------------------------------- ----------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
--------------------------------------
-- | To transform a Clique into a PhyloGroup -- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p = cliqueToGroup prd lvl idx lbl fis p = PhyloGroup ((prd, lvl), idx) lbl ngrams
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing (getNgramsMeta cooc ngrams)
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)) -- empty
(singleton "support" (fromIntegral $ getSupport fis))
Nothing
cooc
[] [] [] childs [] [] [] childs
where where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInRoots x p) ngrams = sort $ map (\x -> getIdxInRoots x p)
...@@ -137,22 +144,62 @@ cliqueToGroup prd lvl idx lbl fis p = ...@@ -137,22 +144,62 @@ cliqueToGroup prd lvl idx lbl fis p =
-------------------------------------- --------------------------------------
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
cooc
ascLink desLink [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup -- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p)) (getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] [] [] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups ----------------------
toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo -- | toPhyloLevel | --
toPhyloLevel lvl m p = alterPhyloPeriods ----------------------
(\period -> let pId = _phylo_periodId period
in over (phylo_periodLevels)
(\phyloLevels -> -- | To reconstruct the Phylo from a set of Document to a given Level
let groups = toPhyloGroups lvl pId (m ! pId) m p toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
in phyloLevels ++ [PhyloLevel (pId, lvl) groups] toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
) period) p where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = tracePhyloN 0
$ addPhyloLevel 0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
--------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks -- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
...@@ -162,7 +209,6 @@ toNthLevel lvlMax prox clus p ...@@ -162,7 +209,6 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus | otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1) $ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
$ traceTranspose (lvl + 1)
$ transposePeriodLinks (lvl + 1) $ transposePeriodLinks (lvl + 1)
$ tracePhyloN (lvl + 1) $ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1) $ setLevelLinks (lvl, lvl + 1)
...@@ -181,116 +227,47 @@ toNthLevel lvlMax prox clus p ...@@ -181,116 +227,47 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceReBranches 1 Fis (FisParams k s t) -> traceBranches 1
-- $ reLinkPhyloBranches 1 -- $ reLinkPhyloBranches 1
$ traceBranches 1 -- $ traceBranches 1
$ setPhyloBranches 1 $ setPhyloBranches 1
$ traceTempoMatching Descendant 1 $ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1 $ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox $ interTempoMatching Ascendant 1 prox
$ tracePhylo1 $ tracePhyloN 1
$ setLevelLinks (0,1) $ setLevelLinks (0,1)
$ addPhyloLevel 1 phyloFis phylo' $ addPhyloLevel 1 phyloFis phylo'
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = toPhyloFis' (getPhyloFis phylo') k s t phyloFis = refineFis (getPhyloFis phylo') k s t
-------------------------------------- --------------------------------------
phylo' :: Phylo phylo' :: Phylo
phylo' = docsToFis' d p phylo' = docsToFis d p
-------------------------------------- --------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized" _ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
-- | To reconstruct the Level 0 of a Phylo -- | To create the base of the Phylo (foundations, periods, cooc, etc)
toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhylo0 d p = addPhyloLevel 0 d p toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
class PhyloMaker corpus
where
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
instance PhyloMaker [(Date, Text)]
where where
-------------------------------------- --------------------------------------
toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1 cooc :: Map Date (Map (Int,Int) Double)
where cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
--------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p nbDocs :: Map Date Double
where nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both fst (head' "LevelMaker" c,last c)
--------------------------------------
-------------------------------------- --------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundationsRoots p) c foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
instance PhyloMaker [Document]
where
-------------------------------------- --------------------------------------
toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1 periods :: [(Date,Date)]
where periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
-------------------------------------- $ both date (head' "LevelMaker" c,last c)
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
--------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "LevelMaker" c,last c)
--------------------------------------
--------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
----------------- -----------------
...@@ -298,25 +275,11 @@ instance PhyloMaker [Document] ...@@ -298,25 +275,11 @@ instance PhyloMaker [Document]
----------------- -----------------
tracePhylo0 :: Phylo -> Phylo
tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
tracePhylo1 :: Phylo -> Phylo
tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
tracePhyloN :: Level -> Phylo -> Phylo tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n" tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p <> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
traceTranspose :: Level -> Phylo -> Phylo
traceTranspose lvl p = trace ("----\nTranspose "
<> show (length $ getGroupsWithLevel lvl p) <> " groups in Phylo "
<> show (lvl) <> "\n") p
tracePhyloBase :: Phylo -> Phylo tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n" tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from " <> show (length $ _phylo_periods p) <> " periods from "
...@@ -329,38 +292,14 @@ tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n" ...@@ -329,38 +292,14 @@ tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n" traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length pts) <> " pointers\n" <> "count : " <> show (length pts) <> " pointers\n") p
<> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) " where
<> show (percentile 50 (VS.fromList sim)) <> " (50%) "
<> show (percentile 75 (VS.fromList sim)) <> " (75%) "
<> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
where
--------------------------------------
sim :: [Double]
sim = sort $ map snd pts
-------------------------------------- --------------------------------------
pts :: [Pointer] pts :: [Pointer]
pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
-------------------------------------- --------------------------------------
traceReBranches :: Level -> Phylo -> Phylo
traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n" traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n" <> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
......
...@@ -19,13 +19,13 @@ module Gargantext.Viz.Phylo.LinkMaker ...@@ -19,13 +19,13 @@ module Gargantext.Viz.Phylo.LinkMaker
import Control.Parallel.Strategies import Control.Parallel.Strategies
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, nub, groupBy, union, inits, scanl, find) import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, groupBy, union, inits, scanl, find)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member) import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Maybe as Maybe import qualified Data.Maybe as Maybe
import qualified Data.Map as Map import qualified Data.Map as Map
......
...@@ -6,59 +6,93 @@ License : AGPL + CECILL v3 ...@@ -6,59 +6,93 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Phylo.Metrics.Proximity module Gargantext.Viz.Phylo.Metrics
where where
import Data.List (null,union,intersect)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,filterWithKey)
import Gargantext.Prelude import Gargantext.Prelude
import Data.List ((\\), sortOn, concat, nub, take, union, intersect, null)
import Data.Map (Map, foldlWithKey, toList, size, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault)
import Data.Text (Text)
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
----------------
-- | Ngrams | --
----------------
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ foldlWithKey (\s (x,_) v -> if x == j
then s + v
else s ) 0 m
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / 2
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / 2
-- | Return the coverage score of a given ngram
coverage :: Map (Int, Int) Double -> [Int] -> Int -> Double
coverage m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / 2
-- | Process some metrics on top of ngrams
getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("coverage" , map (\n -> coverage m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ toList cooc
-------------------------
-- | Ngrams Dynamics | --
-------------------------
-------------------
-- | Proximity | --
-------------------
-- | Process the inverse sumLog
sumInvLog :: Double -> [Double] -> Double sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- | Process the sumLog
sumLog :: Double -> [Double] -> Double sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- -- | To process WeighedLogJaccard distance between to coocurency matrix
-- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
-- weightedLogJaccard sens cooc cooc' nbDocs
-- | null union' = 0
-- | union' == inter' = 1
-- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
-- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
-- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
-- where
-- --------------------------------------
-- keysInter :: [Int]
-- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
-- --------------------------------------
-- keysUnion :: [Int]
-- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
-- --------------------------------------
-- wInter :: Map (Int,Int) Double
-- wInter = map (/nbDocs) inter'
-- --------------------------------------
-- wUnion :: Map (Int,Int) Double
-- wUnion = map (/nbDocs) union'
-- --------------------------------------
-- inter' :: Map (Int, Int) Double
-- inter' = intersectionWith (+) cooc cooc'
-- --------------------------------------
-- union' :: Map (Int, Int) Double
-- union' = unionWith (+) cooc cooc'
-- --------------------------------------
-- | To compute a jaccard similarity between two lists -- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union') jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
...@@ -93,7 +127,6 @@ weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams' ...@@ -93,7 +127,6 @@ weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
-------------------------------------- --------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields -- | To process the Hamming distance between two PhyloGroup fields
hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2)) hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
...@@ -102,3 +135,9 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s ...@@ -102,3 +135,9 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s
inter :: Map (Int, Int) Double inter :: Map (Int, Int) Double
inter = intersection f1 f2 inter = intersection f1 f2
-------------------------------------- --------------------------------------
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Metrics.Clustering
where
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (concat,null,nub,(++),elemIndex,groupBy,(!!), (\\), union, intersect)
import Data.Map (fromList,mapKeys)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
-- import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where
--------------------------------------
idx :: PhyloGroup -> Int
idx e = case elemIndex e nodes of
Nothing -> panic "[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
Just i -> i
--------------------------------------
...@@ -139,7 +139,7 @@ listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ] ...@@ -139,7 +139,7 @@ listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label -- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ ngramsToText ngrams l ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text -- | To transform a list of Ngrams Indexes into a list of Text
...@@ -147,6 +147,11 @@ ngramsToText :: Vector Ngrams -> [Int] -> [Text] ...@@ -147,6 +147,11 @@ ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x) -- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2 unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
...@@ -439,6 +444,7 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup ...@@ -439,6 +444,7 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
lbl lbl
idxs idxs
(Map.empty) (Map.empty)
(Map.empty)
Nothing Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p)) (getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] [] [] [] [] []
......
...@@ -18,17 +18,17 @@ module Gargantext.Viz.Phylo.View.Taggers ...@@ -18,17 +18,17 @@ module Gargantext.Viz.Phylo.View.Taggers
where where
import Control.Lens hiding (makeLenses, both, Level) import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort) import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Map (Map) import Data.Map (Map, (!))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import qualified Data.Map as Map import qualified Data.Map as Map
-- import Debug.Trace (trace) import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups -- | To get the nth most frequent Ngrams in a list of PhyloGroups
...@@ -82,14 +82,20 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v ...@@ -82,14 +82,20 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getNodesByBranches v $ getNodesByBranches v
getNthMostMeta :: Int -> Text -> PhyloGroup -> [Int]
getNthMostMeta nth meta g = map (\(idx,_) -> (getGroupNgrams g !! idx))
$ take nth
$ sortOn snd $ zip [0..]
$ (g ^. phylo_groupNgramsMeta) ! meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes -- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes nodeLabelCooc v thr p = over (pv_nodes
. traverse) . traverse)
(\n -> let lbl = ngramsToLabel (getFoundationsRoots p) (\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
$ mostOccNgrams thr lbl' = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta thr "coverage" g
$ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p in trace (show (lbl')) $ n & pn_label .~ lbl') v
in n & pn_label .~ lbl) v
-- | To process a sorted list of Taggers to a PhyloView -- | To process a sorted list of Taggers to a PhyloView
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment