Commit ff80ee2f authored by qlobbe's avatar qlobbe

fix temporalMatching

parent 38d3a9cd
Pipeline #415 failed with stage
......@@ -77,6 +77,8 @@ data Phylo =
Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_param :: PhyloParam
}
deriving (Generic, Show, Eq)
......@@ -150,6 +152,7 @@ data PhyloGroup =
, _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
......
......@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
......@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc = case prox of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard' sens (getGroupNgrams x) (getGroupNgrams y) cooc))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y) nbDocs))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
......@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods
$ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
--------------------------------------
prox :: Proximity
prox = getProximity clus
......
......@@ -17,8 +17,10 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc
where
import Data.List (union,concat,nub)
import Data.Map (Map,elems,adjust,filterWithKey)
import Data.List (union,concat,nub,sort)
import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
......@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
......@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Document
where
import Data.List (last)
import Data.Map (Map)
import Data.Map (Map,fromListWith)
import Data.Text (Text)
import Data.Tuple (fst)
import Data.Vector (Vector)
......@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l))
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end]
......@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
......@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
......@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis
......
......@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker
......@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.3 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 0)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
......@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
----------------------------------
......@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) defaultPhyloParam
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc defaultPhyloParam
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double
nbDocs = countDocs corpus
periods :: [(Date,Date)]
periods = initPeriods 5 3
......
......@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS
......@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l
toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
--------------------------------------
......@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] (map (\g -> (getGroupId g, 1)) groups)
where
--------------------------------------
ngrams :: [Int]
......@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] []
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
where
--------------------------------------
ngrams :: [Int]
......@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p =
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] []
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
......@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$ transposePeriodLinks (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
......@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)]
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
--------------------------------------
--------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p
toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList
......@@ -240,8 +250,14 @@ instance PhyloMaker [Document]
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
--------------------------------------
--------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p
toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList
......
......@@ -18,14 +18,13 @@ module Gargantext.Viz.Phylo.LinkMaker
where
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy)
import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith)
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Aggregates.Cooc
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import qualified Data.Map as Map
......@@ -34,9 +33,9 @@ import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
------------------------------------------------------------------------
-- | Make links from Level to Level
-----------------------------
-- | From Level to level | --
-----------------------------
-- | To choose a LevelLink strategy based an a given Level
......@@ -82,17 +81,9 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 cooc = case prox of
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard' s (getGroupNgrams g1) (getGroupNgrams g2) cooc)
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
-------------------------------
-- | From Period to Period | --
-------------------------------
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
......@@ -113,35 +104,57 @@ getNextPeriods to' id l = case to' of
--------------------------------------
-- | To find the best candidates regarding a given proximity
findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates' fil depth limit prox prds gs g p
| depth > limit || null next = ([],[])
| (not . null) bestScores = (take 2 bestScores, map snd scores)
| otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g p
where
-- | To get the number of docs produced during a list of periods
periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
periodsToNbDocs prds phylo = sum $ elems
$ restrictKeys (phylo ^. phylo_docsByYears)
$ periodsToYears prds
-- | To process a given Proximity
processProximity :: Proximity -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
processProximity proximity cooc cooc' nbDocs = case proximity of
WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens cooc cooc' nbDocs
Hamming (HammingParams _) -> hamming cooc cooc'
_ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> [(Date,Date)] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates filiation depth limit proximity periods candidates g1 phylo
| depth > limit || null nextPeriods = ([],[])
| (not . null) pointers = (head' "findBestCandidates" $ groupBy (\x y -> snd x == snd y) pointers
,map snd similarities)
| otherwise = findBestCandidates filiation (depth + 1) limit proximity periods candidates g1 phylo
where
--------------------------------------
pointers :: [(PhyloGroupId, Double)]
pointers = reverse $ sortOn snd $ filter (\(_,score) -> case proximity of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
) similarities
--------------------------------------
next :: [PhyloPeriodId]
next = take depth prds
similarities :: [(PhyloGroupId, Double)]
similarities = concat $ map (\(g2,g3) -> let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
cooc2 = getGroupCooc g2
cooc3 = getGroupCooc g3
score = processProximity proximity cooc1 (unionWith (+) cooc2 cooc3) nbDocs
in nub $ [(getGroupId g2,score),(getGroupId g3,score)]) pairsOfCandidates
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getCooc next p
pairsOfCandidates :: [(PhyloGroup,PhyloGroup)]
pairsOfCandidates = listToFullCombi $ filter (\g -> elem (getGroupPeriod g) nextPeriods) candidates
--------------------------------------
candidates :: [PhyloGroup]
candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
cooc1 :: Map (Int,Int) Double
cooc1 = getGroupCooc g1
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\g' -> applyProximity prox g g' cooc) candidates
--------------------------------------
bestScores :: [(PhyloGroupId, Double)]
bestScores = reverse
$ sortOn snd
$ filter (\(_id,score) -> case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
) scores
--------------------------------------
nextPeriods :: [(Date,Date)]
nextPeriods = take depth periods
--------------------------------------
-- | To add some Pointer to a PhyloGroup
......@@ -189,56 +202,31 @@ toBranches mem gs
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches' :: [[[Int]]] -> [[Int]] -> [[[Int]]]
toBranches' mem gs
| null gs = mem
| otherwise = toBranches' mem' $ tail gs
where
--------------------------------------
mem' :: [[[Int]]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[[Int]]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat gs')
(head' "toBranches" gs)
) mem
--------------------------------------
-- | To apply the intertemporal matching to Phylo at a given level
-- | To process an intertemporal matching task to a Phylo at a given level
-- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) scores
$ updateGroups fil lvl pointers p
interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) debug $ updateGroups fil lvl pointersMap p
where
--------------------------------------
pointers :: Map PhyloGroupId [Pointer]
pointers = Map.fromList $ map (\(id,x) -> (id,fst x)) candidates
--------------------------------------
scores :: [Double]
scores = sort $ concat $ map (snd . snd) candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
debug :: [Double]
debug = sort $ concat $ map (snd . snd) pointers
--------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = concat $ map (\b -> map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) b) bs
pointersMap :: Map PhyloGroupId [Pointer]
pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
pointers :: [(PhyloGroupId,([Pointer],[Double]))]
pointers = concat
$ map (\branche ->
map (\g -> ( getGroupId g
, findBestCandidates fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
) branche ) branches
--------------------------------------
bs :: [[PhyloGroup]]
bs = tracePreBranches $ toBranches [[head' "interTempoMatching" gs]] $ tail gs
--------------------------------------
prds :: [PhyloPeriodId]
prds = getPhyloPeriods p
branches :: [[PhyloGroup]]
branches = tracePreBranches
$ toBranches [[head' "interTempoMatching" (getGroupsWithLevel lvl p)]]
$ tail (getGroupsWithLevel lvl p)
--------------------------------------
......@@ -256,6 +244,7 @@ toLevelUp lst p = Map.toList
--------------------------------------
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterGroupWithLevel
(\g ->
......@@ -269,6 +258,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel
--------------------------------------
) lvl p
----------------
-- | Tracer | --
----------------
......
......@@ -17,65 +17,40 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity
where
import Data.List (null,intersect,union)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,keys)
import Gargantext.Prelude
import Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard' :: Double -> [Int] -> [Int] -> Map (Int, Int) Double -> Double
weightedLogJaccard' s idx idx' cooc
| null idxUnion = 0
| idxUnion == idxInter = 1
| s == 0 = (fromIntegral $ length idxInter)/(fromIntegral $ length idxUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
weightedLogJaccard sens cooc cooc' nbDocs
| null union' = 0
| union' == inter' = 1
| sens == 0 = (fromIntegral $ length $ keys inter') / (fromIntegral $ length $ keys union')
| sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
| otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ getSubCooc idxInter cooc
--------------------------------------
wUnion :: [Double]
wUnion = elems $ getSubCooc idxUnion cooc
--------------------------------------
idxInter :: [Int]
idxInter = intersect idx idx'
wInter :: Map (Int,Int) Double
wInter = map (/nbDocs) inter'
--------------------------------------
idxUnion :: [Int]
idxUnion = union idx idx'
wUnion :: Map (Int,Int) Double
wUnion = map (/nbDocs) union'
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
inter' :: Map (Int, Int) Double
inter' = intersectionWith (+) cooc cooc'
--------------------------------------
union' :: Map (Int, Int) Double
union' = unionWith (+) cooc cooc'
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
......
......@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level, Empty)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!))
import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set)
import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra
......@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> PhyloParam -> Phylo
initPhyloBase pds fds prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) prm
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> PhyloParam -> Phylo
initPhyloBase pds fds nbDocs cooc prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc prm
-- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
......@@ -175,6 +175,11 @@ getLastLevel p = (last . sort)
. traverse
. phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
--------------------
-- | PhyloRoots | --
......@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
--------------------
-- | PhyloGroup | --
--------------------
......@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId
......@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx)
lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams)
idxs
(Map.empty)
Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
---------------------
......@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat) [[d,d'] | (d,d') <- periods]
--------------------
-- | PhyloLevel | --
--------------------
......
......@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min'))
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
--------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment