Commit ff80ee2f authored by qlobbe's avatar qlobbe

fix temporalMatching

parent 38d3a9cd
Pipeline #415 failed with stage
...@@ -77,6 +77,8 @@ data Phylo = ...@@ -77,6 +77,8 @@ data Phylo =
Phylo { _phylo_duration :: (Start, End) Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations , _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod] , _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -150,6 +152,7 @@ data PhyloGroup = ...@@ -150,6 +152,7 @@ data PhyloGroup =
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double , _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId , _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
......
...@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo ...@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of ...@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity -- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge]) groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc = case prox of groupsToGraph nbDocs prox gs = case prox of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y) nbDocs))
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc))) $ getCandidates gs)
-- $ getCandidates gs) Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard' sens (getGroupNgrams x) (getGroupNgrams y) cooc))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
_ -> undefined _ -> undefined
...@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
-------------------------------------- --------------------------------------
graphs :: [([GroupNode],[GroupEdge])] graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox) graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
-------------------------------------- --------------------------------------
prox :: Proximity prox :: Proximity
prox = getProximity clus prox = getProximity clus
......
...@@ -17,8 +17,10 @@ Portability : POSIX ...@@ -17,8 +17,10 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc module Gargantext.Viz.Phylo.Aggregates.Cooc
where where
import Data.List (union,concat,nub) import Data.List (union,concat,nub,sort)
import Data.Map (Map,elems,adjust,filterWithKey) import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
...@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g) ...@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
-------------------------------------- --------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- phyloCooc :: Map (Int, Int) Double -- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1 -- phyloCooc = fisToCooc phyloFis phylo1_0_1
...@@ -17,8 +17,7 @@ Portability : POSIX ...@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Document module Gargantext.Viz.Phylo.Aggregates.Document
where where
import Data.List (last) import Data.Map (Map,fromListWith)
import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple (fst) import Data.Tuple (fst)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector ...@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector
-- | To init a list of Periods framed by a starting Date and an ending Date -- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)] initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l)) initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end] $ chunkAlong g s [start .. end]
...@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds ...@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) = inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h fst $ List.partition (\d -> f' d >= start && f' d <= end) h
-------------------------------------- --------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams -- | To parse a list of Documents by filtering on a Vector of Ngrams
...@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t) ...@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots) -> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c $ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
...@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of ...@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis] filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
......
...@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList) ...@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker import Gargantext.Viz.Phylo.LevelMaker
...@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.3 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 0) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
...@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1 ...@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster] phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1 phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
---------------------------------- ----------------------------------
...@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase ...@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) defaultPhyloParam phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc defaultPhyloParam
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double
nbDocs = countDocs corpus
periods :: [(Date,Date)] periods :: [(Date,Date)]
periods = initPeriods 5 3 periods = initPeriods 5 3
......
...@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis ...@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster ...@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
-------------------------------------- --------------------------------------
...@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document ...@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup -- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m = clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups) PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] (map (\g -> (getGroupId g, 1)) groups)
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
...@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m = ...@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup -- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p = cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
...@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p = ...@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p =
-- | To transform a list of Ngrams into a PhyloGroup -- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
...@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p ...@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus | otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1) $ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$ transposePeriodLinks (lvl + 1) $ transposePeriodLinks (lvl + 1)
$ setLevelLinks (lvl, lvl + 1) $ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) $ addPhyloLevel (lvl + 1)
...@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)] ...@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)]
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
where where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs c
-------------------------------------- --------------------------------------
foundations :: PhyloFoundations foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList foundations = PhyloFoundations (initFoundationsRoots roots) termList
...@@ -240,8 +250,14 @@ instance PhyloMaker [Document] ...@@ -240,8 +250,14 @@ instance PhyloMaker [Document]
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
where where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
-------------------------------------- --------------------------------------
foundations :: PhyloFoundations foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList foundations = PhyloFoundations (initFoundationsRoots roots) termList
......
...@@ -18,14 +18,13 @@ module Gargantext.Viz.Phylo.LinkMaker ...@@ -18,14 +18,13 @@ module Gargantext.Viz.Phylo.LinkMaker
where where
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect) import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith) import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Maybe as Maybe import qualified Data.Maybe as Maybe
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -34,9 +33,9 @@ import qualified Data.Vector.Storable as VS ...@@ -34,9 +33,9 @@ import qualified Data.Vector.Storable as VS
import Debug.Trace (trace) import Debug.Trace (trace)
import Numeric.Statistics (percentile) import Numeric.Statistics (percentile)
-----------------------------
------------------------------------------------------------------------ -- | From Level to level | --
-- | Make links from Level to Level -----------------------------
-- | To choose a LevelLink strategy based an a given Level -- | To choose a LevelLink strategy based an a given Level
...@@ -82,17 +81,9 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve ...@@ -82,17 +81,9 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-------------------------------------- --------------------------------------
------------------------------------------------------------------------ -------------------------------
-- | Make links from Period to Period -- | From Period to Period | --
-------------------------------
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 cooc = case prox of
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard' s (getGroupNgrams g1) (getGroupNgrams g2) cooc)
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId -- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
...@@ -113,35 +104,57 @@ getNextPeriods to' id l = case to' of ...@@ -113,35 +104,57 @@ getNextPeriods to' id l = case to' of
-------------------------------------- --------------------------------------
-- | To find the best candidates regarding a given proximity -- | To get the number of docs produced during a list of periods
findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double]) periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
findBestCandidates' fil depth limit prox prds gs g p periodsToNbDocs prds phylo = sum $ elems
| depth > limit || null next = ([],[]) $ restrictKeys (phylo ^. phylo_docsByYears)
| (not . null) bestScores = (take 2 bestScores, map snd scores) $ periodsToYears prds
| otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g p
where
-- | To process a given Proximity
processProximity :: Proximity -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
processProximity proximity cooc cooc' nbDocs = case proximity of
WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens cooc cooc' nbDocs
Hamming (HammingParams _) -> hamming cooc cooc'
_ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> [(Date,Date)] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates filiation depth limit proximity periods candidates g1 phylo
| depth > limit || null nextPeriods = ([],[])
| (not . null) pointers = (head' "findBestCandidates" $ groupBy (\x y -> snd x == snd y) pointers
,map snd similarities)
| otherwise = findBestCandidates filiation (depth + 1) limit proximity periods candidates g1 phylo
where
--------------------------------------
pointers :: [(PhyloGroupId, Double)]
pointers = reverse $ sortOn snd $ filter (\(_,score) -> case proximity of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
) similarities
-------------------------------------- --------------------------------------
next :: [PhyloPeriodId] similarities :: [(PhyloGroupId, Double)]
next = take depth prds similarities = concat $ map (\(g2,g3) -> let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
cooc2 = getGroupCooc g2
cooc3 = getGroupCooc g3
score = processProximity proximity cooc1 (unionWith (+) cooc2 cooc3) nbDocs
in nub $ [(getGroupId g2,score),(getGroupId g3,score)]) pairsOfCandidates
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) Double pairsOfCandidates :: [(PhyloGroup,PhyloGroup)]
cooc = getCooc next p pairsOfCandidates = listToFullCombi $ filter (\g -> elem (getGroupPeriod g) nextPeriods) candidates
-------------------------------------- --------------------------------------
candidates :: [PhyloGroup] cooc1 :: Map (Int,Int) Double
candidates = filter (\g' -> elem (getGroupPeriod g') next) gs cooc1 = getGroupCooc g1
-------------------------------------- --------------------------------------
scores :: [(PhyloGroupId, Double)] nextPeriods :: [(Date,Date)]
scores = map (\g' -> applyProximity prox g g' cooc) candidates nextPeriods = take depth periods
-------------------------------------- --------------------------------------
bestScores :: [(PhyloGroupId, Double)]
bestScores = reverse
$ sortOn snd
$ filter (\(_id,score) -> case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
) scores
--------------------------------------
-- | To add some Pointer to a PhyloGroup -- | To add some Pointer to a PhyloGroup
...@@ -189,56 +202,31 @@ toBranches mem gs ...@@ -189,56 +202,31 @@ toBranches mem gs
-------------------------------------- --------------------------------------
-- | a init avec la [[head groups]] et la tail groups -- | To process an intertemporal matching task to a Phylo at a given level
toBranches' :: [[[Int]]] -> [[Int]] -> [[[Int]]] -- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
toBranches' mem gs -- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
| null gs = mem -- | 3) update all the groups with the new pointers if they exist
| otherwise = toBranches' mem' $ tail gs
where
--------------------------------------
mem' :: [[[Int]]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[[Int]]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat gs')
(head' "toBranches" gs)
) mem
--------------------------------------
-- | To apply the intertemporal matching to Phylo at a given level
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) scores interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) debug $ updateGroups fil lvl pointersMap p
$ updateGroups fil lvl pointers p
where where
-------------------------------------- --------------------------------------
pointers :: Map PhyloGroupId [Pointer] debug :: [Double]
pointers = Map.fromList $ map (\(id,x) -> (id,fst x)) candidates debug = sort $ concat $ map (snd . snd) pointers
--------------------------------------
scores :: [Double]
scores = sort $ concat $ map (snd . snd) candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
-------------------------------------- --------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))] pointersMap :: Map PhyloGroupId [Pointer]
candidates = concat $ map (\b -> map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) b) bs pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
-------------------------------------- --------------------------------------
gs :: [PhyloGroup] pointers :: [(PhyloGroupId,([Pointer],[Double]))]
gs = getGroupsWithLevel lvl p pointers = concat
$ map (\branche ->
map (\g -> ( getGroupId g
, findBestCandidates fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
) branche ) branches
-------------------------------------- --------------------------------------
bs :: [[PhyloGroup]] branches :: [[PhyloGroup]]
bs = tracePreBranches $ toBranches [[head' "interTempoMatching" gs]] $ tail gs branches = tracePreBranches
-------------------------------------- $ toBranches [[head' "interTempoMatching" (getGroupsWithLevel lvl p)]]
prds :: [PhyloPeriodId] $ tail (getGroupsWithLevel lvl p)
prds = getPhyloPeriods p
-------------------------------------- --------------------------------------
...@@ -256,6 +244,7 @@ toLevelUp lst p = Map.toList ...@@ -256,6 +244,7 @@ toLevelUp lst p = Map.toList
-------------------------------------- --------------------------------------
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks :: Level -> Phylo -> Phylo transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterGroupWithLevel transposePeriodLinks lvl p = alterGroupWithLevel
(\g -> (\g ->
...@@ -269,6 +258,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel ...@@ -269,6 +258,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel
-------------------------------------- --------------------------------------
) lvl p ) lvl p
---------------- ----------------
-- | Tracer | -- -- | Tracer | --
---------------- ----------------
......
...@@ -17,65 +17,40 @@ Portability : POSIX ...@@ -17,65 +17,40 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity module Gargantext.Viz.Phylo.Metrics.Proximity
where where
import Data.List (null,intersect,union) import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size) import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,keys)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields sumInvLog :: Double -> [Double] -> Double
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard' :: Double -> [Int] -> [Int] -> Map (Int, Int) Double -> Double -- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard' s idx idx' cooc weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
| null idxUnion = 0 weightedLogJaccard sens cooc cooc' nbDocs
| idxUnion == idxInter = 1 | null union' = 0
| s == 0 = (fromIntegral $ length idxInter)/(fromIntegral $ length idxUnion) | union' == inter' = 1
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion) | sens == 0 = (fromIntegral $ length $ keys inter') / (fromIntegral $ length $ keys union')
| otherwise = (sumLog wInter)/(sumLog wUnion) | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
| otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
where where
-------------------------------------- --------------------------------------
wInter :: [Double] wInter :: Map (Int,Int) Double
wInter = elems $ getSubCooc idxInter cooc wInter = map (/nbDocs) inter'
--------------------------------------
wUnion :: [Double]
wUnion = elems $ getSubCooc idxUnion cooc
--------------------------------------
idxInter :: [Int]
idxInter = intersect idx idx'
-------------------------------------- --------------------------------------
idxUnion :: [Int] wUnion :: Map (Int,Int) Double
idxUnion = union idx idx' wUnion = map (/nbDocs) union'
-------------------------------------- --------------------------------------
sumInvLog :: [Double] -> Double inter' :: Map (Int, Int) Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l inter' = intersectionWith (+) cooc cooc'
--------------------------------------
union' :: Map (Int, Int) Double
union' = unionWith (+) cooc cooc'
-------------------------------------- --------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields -- | To process the Hamming distance between two PhyloGroup fields
......
...@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools ...@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where where
import Control.Lens hiding (both, Level, Empty) import Control.Lens hiding (both, Level, Empty)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy) import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe) import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!)) import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text,toLower,unwords) import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra import Data.Tuple.Extra
...@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams ...@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations -- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> PhyloParam -> Phylo initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> PhyloParam -> Phylo
initPhyloBase pds fds prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) prm initPhyloBase pds fds nbDocs cooc prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc prm
-- | To init the param of a Phylo -- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
...@@ -175,6 +175,11 @@ getLastLevel p = (last . sort) ...@@ -175,6 +175,11 @@ getLastLevel p = (last . sort)
. traverse . traverse
. phylo_periodLevels ) p . phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-------------------- --------------------
-- | PhyloRoots | -- -- | PhyloRoots | --
...@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of ...@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots" Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
-------------------- --------------------
-- | PhyloGroup | -- -- | PhyloGroup | --
-------------------- --------------------
...@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId ...@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup -- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId getGroupLevel = snd . fst . getGroupId
...@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup ...@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx) (((from', to'), lvl), idx)
lbl lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams) idxs
(Map.empty) (Map.empty)
Nothing Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] [] [] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
--------------------- ---------------------
...@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod ...@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat) [[d,d'] | (d,d') <- periods]
-------------------- --------------------
-- | PhyloLevel | -- -- | PhyloLevel | --
-------------------- --------------------
......
...@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v' ...@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where where
-------------------------------------- --------------------------------------
v' :: PhyloView v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min')) v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
-------------------------------------- --------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment