Commit 87a8bd2c authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev-merge

parents ed3661a1 36c913d9
......@@ -22,8 +22,11 @@ Phylo binaries
module Main where
import System.Directory (doesFileExist)
import Data.Aeson
import Data.Text (Text, unwords)
import Data.List ((++))
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude
......@@ -42,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node
import Data.Maybe
import qualified Data.Map as DM
import qualified Data.Vector as DV
import qualified Data.List as DL
......@@ -62,6 +62,7 @@ import qualified Data.ByteString.Lazy as L
type ListPath = FilePath
type FisPath = FilePath
type CorpusPath = FilePath
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int
......@@ -70,13 +71,18 @@ data Conf =
Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType
, listPath :: ListPath
, fisPath :: FilePath
, outputPath :: FilePath
, phyloName :: Text
, limit :: Limit
, timeGrain :: Int
, timeStep :: Int
, timeFrame :: Int
, timeFrameTh :: Double
, timeTh :: Double
, timeSens :: Double
, reBranchThr :: Double
, reBranchNth :: Int
, clusterTh :: Double
, clusterSens :: Double
, phyloLevel :: Int
......@@ -92,6 +98,11 @@ instance ToJSON Conf
instance FromJSON CorpusType
instance ToJSON CorpusType
decoder :: P.Either a b -> b
decoder (P.Left _) = P.error "Error"
decoder (P.Right x) = x
-- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path
......@@ -115,7 +126,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- . DV.reverse
. DV.take limit
-- . DV.reverse
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> CSV.readFile csv
......@@ -146,6 +159,25 @@ parse format limit path l = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- | To parse an existing Fis file
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
parseFis path name grain step support clique = do
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
if fisExists
then do
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
case fisJson of
P.Left err -> do
putStrLn err
pure []
P.Right fis -> pure fis
else pure []
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
--------------
-- | Main | --
--------------
......@@ -166,17 +198,28 @@ main = do
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList
let phylo = toPhylo query corpus roots termList mFis
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo
......
......@@ -106,6 +106,7 @@ library:
- crawlerIsidore
- crawlerHAL
- data-time-segment
- deepseq
- directory
- duckling
- exceptions
......@@ -143,6 +144,7 @@ library:
- natural-transformation
- opaleye
- pandoc
- parallel
- parsec
- patches-class
- patches-map
......@@ -268,6 +270,7 @@ executables:
- base
- bytestring
- containers
- directory
- gargantext
- vector
- parallel
......
......@@ -22,7 +22,7 @@ one 8, e54847.
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
......@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Text.Context (TermList)
import Gargantext.Prelude
import Control.DeepSeq
--------------------
-- | PhyloParam | --
--------------------
......@@ -77,6 +79,9 @@ data Phylo =
Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_fis :: Map (Date,Date) [PhyloFis]
, _phylo_param :: PhyloParam
}
deriving (Generic, Show, Eq)
......@@ -150,6 +155,7 @@ data PhyloGroup =
, _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
......@@ -157,7 +163,9 @@ data PhyloGroup =
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
}
deriving (Generic, Show, Eq, Ord)
deriving (Generic, NFData, Show, Eq, Ord)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
......@@ -199,8 +207,8 @@ type Support = Int
data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
} deriving (Show)
, _phyloFis_period :: (Date,Date)
} deriving (Generic,Show,Eq)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
......@@ -343,6 +351,11 @@ data PhyloQueryBuild = PhyloQueryBuild
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity
, _q_interTemporalMatchingFrame :: Int
, _q_interTemporalMatchingFrameTh :: Double
, _q_reBranchThr :: Double
, _q_reBranchNth :: Int
-- Last level of reconstruction
, _q_nthLevel :: Level
......
......@@ -26,6 +26,7 @@ module Gargantext.Viz.Phylo.API
--import Control.Monad.Reader (ask)
import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger
import Gargantext.API.Types
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
......@@ -104,7 +105,7 @@ postPhylo _n _lId q = do
vrs = Just ("1" :: Text)
sft = Just (Software "Gargantext" "4")
prm = initPhyloParam vrs sft (Just q)
pure (toPhyloBase q prm corpus actants termList)
pure (toPhyloBase q prm corpus actants termList empty)
------------------------------------------------------------------------
......@@ -139,6 +140,7 @@ instance ToSchema LouvainParams
instance ToSchema Metric
instance ToSchema Order
instance ToSchema Phylo
instance ToSchema PhyloFis
instance ToSchema PhyloBranch
instance ToSchema PhyloEdge
instance ToSchema PhyloGroup
......
......@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Viz.Phylo.Aggregates.Cluster
where
import Data.List (null,tail,concat,sort,intersect)
import Control.Parallel.Strategies
import Data.List (null,concat,sort,intersect,(++))
import Data.Map (Map)
import Data.Tuple (fst)
import Gargantext.Prelude
......@@ -25,7 +27,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
......@@ -44,19 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc p = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
$ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
_ -> undefined
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
......@@ -80,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
prox :: Proximity
prox = getProximity clus
......@@ -100,7 +104,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> show (lst) <> "\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
......@@ -118,9 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double
traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n"
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
) sim
......@@ -17,14 +17,17 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc
where
import Data.List (union,concat,nub)
import Data.Map (Map,elems,adjust,filterWithKey)
import Data.List (union,concat,nub,sort, sortOn)
import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map
import qualified Data.Set as Set
-- import Debug.Trace (trace)
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
......@@ -83,5 +86,49 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ Map.toList cooc
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
......@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Document
where
import Data.List (last)
import Data.Map (Map)
import Data.Map (Map,fromListWith)
import Data.Text (Text)
import Data.Tuple (fst)
import Data.Vector (Vector)
......@@ -29,23 +28,25 @@ import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Vector as Vector
import Debug.Trace (trace)
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l))
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end]
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
......@@ -54,4 +55,10 @@ parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
......@@ -17,14 +17,16 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Fis
where
import Data.List (null,concat,sort)
import Data.Map (Map, empty,elems)
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,concat,sort,(++))
import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
import Data.Tuple (fst, snd)
import Data.Set (size)
import Gargantext.Prelude
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vector
......@@ -43,12 +45,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis
......@@ -57,38 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m
in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
| null filters = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis' m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n"
$ docsToFis ds
toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
-----------------
......
......@@ -17,45 +17,135 @@ Portability : POSIX
module Gargantext.Viz.Phylo.BranchMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail)
import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems)
import Data.Tuple (fst, snd)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
graphToBranches _lvl (nodes,edges) _p = concat
$ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
$ zip [1..]
$ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
-- | Get the framing period of a branch ([[PhyloGroup]])
getBranchPeriod :: [PhyloGroup] -> (Date,Date)
getBranchPeriod gs =
let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
-- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
(getGroupsPeaks gs (getPhyloReBranchNth p) p)
(getGroupsPeaks gs' (getPhyloReBranchNth p) p))
&& ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
&& (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
) branches
-- | To build a graph using the parents and childs pointers
makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
makeGraph gs p = (gs,edges)
-- | Try to connect a focused branch to other candidate branches by finding the best pointers
reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
reBranch p branch candidates =
let newLinks = map (\branch' ->
let pointers = map (\g ->
-- define pairs of candidates groups
let pairs = listToPairs
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
-- process the matching between the pairs and the current group
in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
in if (g2 == g3)
then mem ++ [(getGroupId g,(getGroupId g2,s))]
else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
) branch
pointers' = pointers `using` parList rdeepseq
-- keep the best pointer between the focused branch and the current candidates
in head' "reBranch" $ reverse $ sortOn (snd . snd)
$ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
) candidates
newLinks' = newLinks `using` parList rdeepseq
in newLinks'
reLinkPhyloBranches :: Level -> Phylo -> Phylo
reLinkPhyloBranches lvl p =
let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
$ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
([],branches) branches
in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
where
edges :: [GroupEdge]
edges = (nub . concat)
$ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
branches :: [[PhyloGroup]]
branches = elems
$ fromListWith (++)
$ foldl' (\mem g -> case getGroupBranchId g of
Nothing -> mem
Just i -> mem ++ [(i,[g])] )
[] $ getGroupsWithLevel lvl p
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
graphToBranches groups = Map.fromList
$ concat
$ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
$ zip [1..]
$ relatedComp
$ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
bs :: [(Int,PhyloGroupId)]
bs = graphToBranches lvl graph p
setPhyloBranches lvl p = alterGroupWithLevel (\g ->
let bIdx = branches ! (getGroupId g)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
graph :: GroupGraph
graph = makeGraph (getGroupsWithLevel lvl p) p
branches :: Map PhyloGroupId Int
branches = graphToBranches (getGroupsWithLevel lvl p)
--------------------------------------
-- trace' bs = trace bs
\ No newline at end of file
......@@ -31,7 +31,7 @@ module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.Text (Text)
import Data.List ((++), last)
import Data.Map (Map)
import Data.Map (Map,empty)
import Data.Tuple (fst)
import Data.Tuple.Extra
import Data.Vector (Vector)
......@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker
......@@ -55,7 +56,7 @@ import qualified Data.List as List
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
......@@ -77,7 +78,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
--------------------------------------------------
......@@ -86,7 +87,7 @@ phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,G
phyloFromQuery :: Phylo
phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList
phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList empty
-- | To do : create a request handler and a query parser
queryParser :: [Char] -> PhyloQueryBuild
......@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.13 0)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
......@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
----------------------------------
......@@ -204,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFis True 1 (filterFisByClique)
$ filterFisByNested
$ filterFis True 1 (filterFisBySupport) (docsToFis phyloDocs)
$ filterFis True 1 (filterFisBySupport) (getPhyloFis phylo')
phylo' :: Phylo
phylo' = docsToFis' phyloDocs phylo
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
----------------------------------------
......@@ -226,7 +230,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) defaultPhyloParam
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc empty defaultPhyloParam
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double
nbDocs = countDocs corpus
periods :: [(Date,Date)]
periods = initPeriods 5 3
......
......@@ -19,6 +19,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.LevelMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last)
import Data.Map (Map, (!), empty, singleton)
......@@ -32,6 +33,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS
......@@ -60,7 +62,10 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
toPhyloGroups lvl (d,d') l m p =
let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
clusters' = clusters `using` parList rdeepseq
in clusters'
--------------------------------------
......@@ -73,7 +78,10 @@ instance PhyloLevelMaker PhyloFis
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
toPhyloGroups lvl (d,d') l _ p =
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
groups' = groups `using` parList rdeepseq
in groups'
--------------------------------------
......@@ -86,18 +94,25 @@ instance PhyloLevelMaker Document
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
$ zip [1..]
toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
$ (nub . concat)
$ map text l
--------------------------------------
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
ascLink desLink [] childs
where
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
......@@ -107,7 +122,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] childs
where
--------------------------------------
ngrams :: [Int]
......@@ -115,12 +132,16 @@ cliqueToGroup prd lvl idx lbl fis p =
$ Set.toList
$ getClique fis
--------------------------------------
childs :: [Pointer]
childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
......@@ -130,7 +151,7 @@ toPhyloLevel lvl m p = alterPhyloPeriods
in over (phylo_periodLevels)
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
......@@ -141,15 +162,16 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$ traceTranspose (lvl + 1)
$ transposePeriodLinks (lvl + 1)
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(phyloToClusters lvl clus p) p
(clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
clusters = phyloToClusters lvl clus p
--------------------------------------
lvl :: Level
lvl = getLastLevel p
......@@ -157,21 +179,26 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams k s t) -> traceBranches 1
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceReBranches 1
-- $ reLinkPhyloBranches 1
$ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhylo1
$ setLevelLinks (0,1)
$ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p
$ addPhyloLevel 1 phyloFis phylo'
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = toPhyloFis d k s t metrics filters
phyloFis = toPhyloFis' (getPhyloFis phylo') k s t
--------------------------------------
phylo' :: Phylo
phylo' = docsToFis' d p
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
......@@ -184,31 +211,37 @@ toPhylo0 d p = addPhyloLevel 0 d p
class PhyloMaker corpus
where
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Phylo
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
instance PhyloMaker [(Date, Text)]
where
--------------------------------------
toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
--------------------------------------
--------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p
toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList
......@@ -224,24 +257,30 @@ instance PhyloMaker [(Date, Text)]
instance PhyloMaker [Document]
where
--------------------------------------
toPhylo q c roots termList = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
toPhylo q c roots termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList fis
--------------------------------------
--------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p
toPhyloBase q p c roots termList fis = initPhyloBase periods foundations nbDocs cooc fis p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList
......@@ -259,6 +298,25 @@ instance PhyloMaker [Document]
-----------------
tracePhylo0 :: Phylo -> Phylo
tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
tracePhylo1 :: Phylo -> Phylo
tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
traceTranspose :: Level -> Phylo -> Phylo
traceTranspose lvl p = trace ("----\nTranspose "
<> show (length $ getGroupsWithLevel lvl p) <> " groups in Phylo "
<> show (lvl) <> "\n") p
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from "
......@@ -286,6 +344,23 @@ traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temp
--------------------------------------
traceReBranches :: Level -> Phylo -> Phylo
traceReBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " after relinking :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
......
......@@ -17,15 +17,15 @@ Portability : POSIX
module Gargantext.Viz.Phylo.LinkMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, nub, groupBy, union, inits, scanl, find)
import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith)
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import qualified Data.Map as Map
......@@ -34,71 +34,40 @@ import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
------------------------------------------------------------------------
-- | Make links from Level to Level
-- | To choose a LevelLink strategy based an a given Level
shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
shouldLink (lvl,_lvl) g g'
| lvl <= 1 = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
| lvl > 1 = elem (getGroupId g) (getGroupLevelChildsId g')
| otherwise = panic ("[ERR][Viz.Phylo.LinkMaker.shouldLink] Level not defined")
-----------------------------
-- | From Level to level | --
-----------------------------
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups (lvl,lvl') current targets
| lvl < lvl' = setLevelParents current
| lvl > lvl' = setLevelChilds current
| otherwise = current
linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
where
--------------------------------------
setLevelChilds :: PhyloGroup -> PhyloGroup
setLevelChilds = over (phylo_groupLevelChilds) addPointers
--------------------------------------
setLevelParents :: PhyloGroup -> PhyloGroup
setLevelParents = over (phylo_groupLevelParents) addPointers
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target ->
if shouldLink (lvl,lvl') current target
if (elem (getGroupId current) (getGroupLevelChildsId target))
then Just ((getGroupId target),1)
else Nothing) targets
--------------------------------------
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLevel g == lvl
then linkGroupToGroups (lvl,lvl') g (filterCandidates g
$ filter (\g' -> getGroupPeriod g' == getGroupPeriod g) gs')
else g) gs) p
where
--------------------------------------
gs' :: [PhyloGroup]
gs' = getGroupsWithLevel lvl' p
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period
setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
$ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 cooc = case prox of
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
-------------------------------
-- | From Period to Period | --
-------------------------------
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods :: Filiation -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to' id l = case to' of
Descendant -> (tail . snd) next
Ascendant -> (reverse . fst) next
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to' limit id l = case to' of
Descendant -> take limit $ (tail . snd) next
Ascendant -> take limit $ (reverse . fst) next
_ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
where
--------------------------------------
......@@ -112,35 +81,76 @@ getNextPeriods to' id l = case to' of
--------------------------------------
-- | To find the best candidates regarding a given proximity
findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates' fil depth limit prox prds gs g p
| depth > limit || null next = ([],[])
| (not . null) bestScores = (take 2 bestScores, map snd scores)
| otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g p
-- | To get the number of docs produced during a list of periods
periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
periodsToNbDocs prds phylo = sum $ elems
$ restrictKeys (phylo ^. phylo_docsByYears)
$ periodsToYears prds
-- | To process a given Proximity
processProximity :: Proximity -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
Hamming (HammingParams _) -> hamming cooc cooc'
_ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity :: Double -> Proximity -> Bool
filterProximity score prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
|| ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
$ listToPairs
$ filter (\g' -> (elem (getGroupPeriod g') prds)
&& ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
&& (((last' "makePairs" prds) == (getGroupPeriod g))
||((matchWithPairs g (g,g') p) >= (getPhyloMatchingFrameTh p))))
$ getGroupsWithLevel (getGroupLevel g) p
matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
matchWithPairs g1 (g2,g3) p =
let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
cooc = if (g2 == g3)
then getGroupCooc g2
else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
ngrams = if (g2 == g3)
then getGroupNgrams g2
else union (getGroupNgrams g2) (getGroupNgrams g3)
in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of
Nothing -> []
Just pts -> head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
next :: [PhyloPeriodId]
next = take depth prds
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getCooc next p
--------------------------------------
candidates :: [PhyloGroup]
candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\g' -> applyProximity prox g g' cooc) candidates
--------------------------------------
bestScores :: [(PhyloGroupId, Double)]
bestScores = reverse
$ sortOn snd
$ filter (\(_id,score) -> case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
) scores
--------------------------------------
pointers :: Maybe [Pointer]
pointers = find (not . null)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame ->
let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
$ concat
$ map (\(t,t') ->
let proxi = matchWithPairs g (t,t') p
in
if (t == t')
then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods
--------------------------------------
-- | To add some Pointer to a PhyloGroup
......@@ -154,74 +164,99 @@ addPointers' fil pts g = g & case fil of
-- | To update a list of phyloGroups with some Pointers
updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
then addPointers' fil (m ! (getGroupId g)) g
else g ) gs) p
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
-- | To apply the intertemporal matching to Phylo at a given level
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) scores
$ updateGroups fil lvl pointers p
-- | a init avec la [[head groups]] et la tail groups
toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
toBranches mem gs
| null gs = mem
| otherwise = toBranches mem' $ tail gs
where
--------------------------------------
pointers :: Map PhyloGroupId [Pointer]
pointers = Map.fromList $ map (\(id,x) -> (id,fst x)) candidates
--------------------------------------
scores :: [Double]
scores = sort $ concat $ map (snd . snd) candidates
--------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
mem' :: [[PhyloGroup]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
withHead :: [[PhyloGroup]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat $ map getGroupNgrams gs')
(getGroupNgrams $ (head' "toBranches" gs))
) mem
--------------------------------------
prds :: [PhyloPeriodId]
prds = getPhyloPeriods p
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
toLevelUp :: [Pointer] -> Phylo -> [Pointer]
toLevelUp lst p = Map.toList
$ map (\ws -> maximum ws)
$ fromListWith (++) [(id, [w]) | (id, w) <- pointers]
-- | To process an intertemporal matching task to a Phylo at a given level
-- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
where
--------------------------------------
pointers :: [Pointer]
pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
pointers :: [(PhyloGroupId,[Pointer])]
pointers =
let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
in (getGroupId g, phyloGroupMatching periods g p)) groups
pts' = pts `using` parList rdeepseq
in pts'
--------------------------------------
groups :: [PhyloGroup]
groups = getGroupsWithLevel lvl p
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterGroupWithLevel
(\g ->
transposePeriodLinks lvl p = alterPhyloGroups
(\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
then
let groups = map (\g -> g & phylo_groupPeriodParents .~ (trackPointers (reduceGroups g lvlGroups)
$ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers (reduceGroups g lvlGroups)
$ g ^. phylo_groupPeriodChilds )) gs
groups' = groups `using` parList rdeepseq
in groups'
else gs
) p
where
--------------------------------------
let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p
ascLink = toLevelUp (concat $ map getGroupPeriodParents childs) p
desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p
-- | find an other way to find the group from the id
trackPointers :: Map PhyloGroupId PhyloGroup -> [Pointer] -> [Pointer]
trackPointers m pts = Map.toList
$ fromListWith (\w w' -> max w w')
$ map (\(id,_w) -> (getGroupLevelParentId $ m ! id,_w)) pts
--------------------------------------
reduceGroups :: PhyloGroup -> [PhyloGroup] -> Map PhyloGroupId PhyloGroup
reduceGroups g gs = Map.fromList
$ map (\g' -> (getGroupId g',g'))
$ filter (\g' -> ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))) gs
--------------------------------------
in g & phylo_groupPeriodParents %~ (++ ascLink)
& phylo_groupPeriodChilds %~ (++ desLink)
lvlGroups :: [PhyloGroup]
lvlGroups = getGroupsWithLevel (lvl - 1) p
--------------------------------------
) lvl p
----------------
-- | Tracer | --
----------------
traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
......@@ -230,3 +265,8 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
<> "with sizes : " <> show (map length bs) <> "\n") bs
......@@ -18,36 +18,23 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (last,concat,null,nub,(++),init,tail,elemIndex,groupBy,(!!))
import Data.List (concat,null,nub,(++),elemIndex,groupBy,(!!), (\\), union, intersect)
import Data.Map (fromList,mapKeys)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp :: Int -> PhyloGroup -> GroupGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp idx curr (nodes,edges) next memo
| null nodes' && null next' = memo'
| (not . null) next' = relatedComp idx (head' "relatedComp1" next' ) (nodes' ,edges) (tail next') memo'
| otherwise = relatedComp (idx + 1) (head' "relatedComp2" nodes') (tail nodes',edges) [] memo'
where
--------------------------------------
memo' :: [[PhyloGroup]]
memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
--------------------------------------
-- import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
......
......@@ -17,33 +17,81 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity
where
import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Data.List (null,union,intersect)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,filterWithKey)
import Gargantext.Prelude
import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = trace ("==0") $ (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = trace (">0") $ (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
-- import Debug.Trace (trace)
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- -- | To process WeighedLogJaccard distance between to coocurency matrix
-- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
-- weightedLogJaccard sens cooc cooc' nbDocs
-- | null union' = 0
-- | union' == inter' = 1
-- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
-- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
-- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
-- where
-- --------------------------------------
-- keysInter :: [Int]
-- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
-- --------------------------------------
-- keysUnion :: [Int]
-- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
-- --------------------------------------
-- wInter :: Map (Int,Int) Double
-- wInter = map (/nbDocs) inter'
-- --------------------------------------
-- wUnion :: Map (Int,Int) Double
-- wUnion = map (/nbDocs) union'
-- --------------------------------------
-- inter' :: Map (Int, Int) Double
-- inter' = intersectionWith (+) cooc cooc'
-- --------------------------------------
-- union' :: Map (Int, Int) Double
-- union' = unionWith (+) cooc cooc'
-- --------------------------------------
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To get the diagonal of a matrix
toDiago :: Map (Int, Int) Double -> [Double]
toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
| null gInter = 0
| gInter == gUnion = 1
| sens == 0 = jaccard gInter gUnion
| sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
| otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
where
--------------------------------------
gInter :: [Int]
gInter = intersect ngrams ngrams'
--------------------------------------
gUnion :: [Int]
gUnion = union ngrams ngrams'
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
......
......@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level, Empty)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!))
import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set)
import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra
......@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
listToEqualCombi :: Eq a => [a] -> [(a,a)]
listToEqualCombi l = [(x,y) | x <- l, y <- l, x == y]
listToPairs :: Eq a => [a] -> [(a,a)]
listToPairs l = (listToEqualCombi l) ++ (listToUnDirectedCombi l)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
......@@ -160,8 +167,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> PhyloParam -> Phylo
initPhyloBase pds fds prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) prm
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> Map (Date,Date) [PhyloFis] -> PhyloParam -> Phylo
initPhyloBase pds fds nbDocs cooc fis prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc fis prm
-- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
......@@ -175,6 +182,41 @@ getLastLevel p = (last . sort)
. traverse
. phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrameTh :: Phylo -> Double
getPhyloMatchingFrameTh p = _q_interTemporalMatchingFrameTh $ _phyloParam_query $ getPhyloParams p
getPhyloProximity :: Phylo -> Proximity
getPhyloProximity p = _q_interTemporalMatching $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchThr :: Phylo -> Double
getPhyloReBranchThr p = _q_reBranchThr $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchNth :: Phylo -> Int
getPhyloReBranchNth p = _q_reBranchNth $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis
--------------------
-- | PhyloRoots | --
......@@ -194,6 +236,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
--------------------
-- | PhyloGroup | --
--------------------
......@@ -209,7 +256,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse
) (\g -> if getGroupLevel g == lvl
then f g
else g ) p
else g ) p
-- | To alter each list of PhyloGroups following a given function
......@@ -242,6 +289,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId
......@@ -344,13 +395,19 @@ getGroups = view ( phylo_periods
)
-- | To get all PhyloGroups matching a list of PhyloGroupIds in a Phylo
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
-- | To get all PhyloGroups matching a list of PhyloGoupIds in a Phylo
-- getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
-- getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
-- | To get a PhyloGroup matching a PhyloGroupId in a Phylo
getGroupFromId :: PhyloGroupId -> Phylo -> PhyloGroup
getGroupFromId id p = (head' "getGroupFromId") $ getGroupsFromIds [id] p
getGroupFromId id p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in groups ! id
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in elems $ restrictKeys groups (Set.fromList ids)
-- | To get the corresponding list of PhyloGroups from a list of PhyloNodes
......@@ -380,10 +437,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx)
lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams)
idxs
(Map.empty)
Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
---------------------
......@@ -418,6 +494,12 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
--------------------
-- | PhyloLevel | --
--------------------
......@@ -464,14 +546,14 @@ setPhyloLevelId lvl' (PhyloLevel (id, _lvl) groups)
getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique
-- | To get the metrics of a PhyloFis
getFisMetrics :: PhyloFis -> Map (Int,Int) (Map Text [Double])
getFisMetrics = _phyloFis_metrics
-- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support
-- | To get the period of a PhyloFis
getFisPeriod :: PhyloFis -> (Date,Date)
getFisPeriod = _phyloFis_period
----------------------------
-- | PhyloNodes & Edges | --
......@@ -737,11 +819,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Double -> Maybe Double -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' nthLevel nthCluster
(def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
......@@ -794,7 +875,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
......@@ -157,7 +157,9 @@ setDotNode pn = node (toNodeDotId $ pn ^. pn_id)
-- | To set an Edge
setDotEdge :: PhyloEdge -> Dot DotId
setDotEdge pe = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]]
setDotEdge pe
| pe ^. pe_weight == 100 = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Red]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]]
-- | To set a Period Edge
......
......@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min'))
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
--------------------------------------
......
......@@ -26,8 +26,9 @@ import Data.Map (Map)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.BranchMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
......@@ -48,14 +49,15 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> Phylo -> PhyloGroup -> [Int]
mostOccNgrams thr p g = (nub . concat )
$ map (\((f,s),_d) -> [f,s])
$ take (thr `div` 2)
$ reverse $ sortOn snd $ Map.toList cooc
mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams nth g = (nub . concat)
$ map (\((f,s),_d) -> [f,s])
$ take nth
$ reverse $ sortOn snd
$ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getSubCooc (getGroupNgrams g) $ getCooc [getGroupPeriod g] p
cooc = getGroupCooc g
-- | To alter the peak of a PhyloBranch
......@@ -74,13 +76,18 @@ branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getGroupsFromNodes ns p))
$ getNodesByBranches v
branchPeakCooc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, ngramsToLabel (getFoundationsRoots p) (getGroupsPeaks (getGroupsFromNodes ns p) nth p) ) )
$ getNodesByBranches v
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ mostOccNgrams thr p
$ mostOccNgrams thr
$ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
in n & pn_label .~ lbl) v
......@@ -89,6 +96,7 @@ nodeLabelCooc v thr p = over (pv_nodes
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
-- BranchPeakFreq -> branchPeakCooc v' 3 p
GroupLabelCooc -> nodeLabelCooc v' 2 p
_ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts
......@@ -153,19 +153,6 @@ toPhyloView q p = traceView
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-----------------
-- | Taggers | --
-----------------
......
......@@ -43,6 +43,7 @@ extra-deps:
- KMP-0.1.0.2
- accelerate-1.2.0.0
- aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4
- duckling-0.1.3.0
- full-text-search-0.2.1.4
- fullstop-0.1.4
......@@ -59,3 +60,4 @@ extra-deps:
- stemmer-0.5.2
- time-units-1.0.0
- validity-0.9.0.0 # patches-{map,class}
- directory-1.3.1.5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment