Commit 815ab543 authored by qlobbe's avatar qlobbe

add parallelism

parent 7832afe9
Pipeline #473 failed with stage
......@@ -103,6 +103,7 @@ library:
- contravariant
- crawlerPubMed
- data-time-segment
- deepseq
- directory
- duckling
- exceptions
......@@ -138,6 +139,7 @@ library:
- natural-transformation
- opaleye
- pandoc
- parallel
- parsec
- patches-class
- patches-map
......
......@@ -22,7 +22,7 @@ one 8, e54847.
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
......@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Text.Context (TermList)
import Gargantext.Prelude
import Control.DeepSeq
--------------------
-- | PhyloParam | --
--------------------
......@@ -161,7 +163,9 @@ data PhyloGroup =
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
}
deriving (Generic, Show, Eq, Ord)
deriving (Generic, NFData, Show, Eq, Ord)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
......
......@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Viz.Phylo.Aggregates.Cluster
where
import Data.List (null,tail,concat,sort,intersect)
import Control.Parallel.Strategies
import Data.List (null,concat,sort,intersect,(++))
import Data.Map (Map)
import Data.Tuple (fst)
import Gargantext.Prelude
......@@ -44,17 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs)
WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
......@@ -78,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
prox :: Proximity
prox = getProximity clus
......@@ -115,7 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
......@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.BranchMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union)
import Data.Map (Map)
......@@ -82,13 +83,14 @@ findSimBranches frame thr nth p (id,gs) bs
pks = getGroupsPeaks gs nth p
--------------------------------------
findBestPointer :: Phylo -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [(PhyloGroupId,Pointer)]
findBestPointer p prox gs gs' = take 1
$ reverse
$ sortOn (snd . snd)
$ concat
$ map (\g -> let pts = findBestCandidates' prox gs' g p
in map (\pt -> (getGroupId g,pt)) pts) gs
findBestPointer p prox gs gs' =
let candidates = map (\g -> let pts = findBestCandidates' prox gs' g p
in map (\pt -> (getGroupId g,pt)) pts) gs
candidates' = candidates `using` parList rdeepseq
in take 1 $ reverse $ sortOn (snd . snd) $ concat candidates'
makeBranchLinks :: Phylo -> Proximity -> (PhyloBranchId,[PhyloGroup]) -> [(PhyloBranchId,[PhyloGroup])] -> [(PhyloGroupId,Pointer)] -> [(PhyloGroupId,Pointer)]
makeBranchLinks p prox (id,gs) bs pts
......@@ -126,34 +128,22 @@ linkPhyloBranches lvl prox p = setPhyloBranches lvl
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
graphToBranches _lvl (nodes,edges) _p = concat
$ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
$ zip [1..]
$ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
-- | To build a graph using the parents and childs pointers
makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
makeGraph gs p = (gs,edges)
where
edges :: [GroupEdge]
edges = (nub . concat)
$ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
graphToBranches :: [PhyloGroup] -> Phylo -> [(Int,PhyloGroupId)]
graphToBranches groups p = concat
$ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
$ zip [1..]
$ relatedComp
$ map (\g -> nub $ [g] ++ (getGroupParents g p) ++ (getGroupChilds g p)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
bs :: [(Int,PhyloGroupId)]
bs = graphToBranches lvl graph p
setPhyloBranches lvl p = alterGroupWithLevel (\g ->
let bIdx = (fst $ head' "branchMaker"
$ filter (\b -> snd b == getGroupId g) branches)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
graph :: GroupGraph
graph = makeGraph (getGroupsWithLevel lvl p) p
branches :: [(Int,PhyloGroupId)]
branches = graphToBranches (getGroupsWithLevel lvl p) p
--------------------------------------
......@@ -105,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 5 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
......
......@@ -19,6 +19,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.LevelMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last)
import Data.Map (Map, (!), empty, singleton)
......@@ -61,7 +62,10 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
toPhyloGroups lvl (d,d') l m p =
let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
clusters' = clusters `using` parList rdeepseq
in clusters'
--------------------------------------
......@@ -74,7 +78,10 @@ instance PhyloLevelMaker PhyloFis
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _ p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
toPhyloGroups lvl (d,d') l _ p =
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
groups' = groups `using` parList rdeepseq
in groups'
--------------------------------------
......@@ -87,8 +94,7 @@ instance PhyloLevelMaker Document
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
$ zip [1..]
toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
$ (nub . concat)
$ map text l
--------------------------------------
......@@ -100,11 +106,13 @@ clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] childs
ascLink desLink [] childs
where
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
......@@ -116,7 +124,7 @@ cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> P
cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
[] [] [] childs
where
--------------------------------------
ngrams :: [Int]
......@@ -124,6 +132,9 @@ cliqueToGroup prd lvl idx lbl fis p =
$ Set.toList
$ getClique fis
--------------------------------------
childs :: [Pointer]
childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
......@@ -152,6 +163,7 @@ toNthLevel lvlMax prox clus p
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
$ transposePeriodLinks (lvl + 1)
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(clusters) p
......@@ -169,15 +181,15 @@ toNthLevel lvlMax prox clus p
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceReBranches 1
$ linkPhyloBranches 1 prox
-- $ linkPhyloBranches 1 prox
$ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhylo1
$ setLevelLinks (0,1)
$ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis phylo'
where
--------------------------------------
......@@ -212,7 +224,7 @@ instance PhyloMaker [(Date, Text)]
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
......@@ -251,7 +263,7 @@ instance PhyloMaker [Document]
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
phylo0 = tracePhylo0 $ toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
......@@ -286,13 +298,16 @@ instance PhyloMaker [Document]
tracePhylo0 :: Phylo -> Phylo
tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n") p
tracePhylo0 p = trace ("\n---------------\n--| Phylo 0 |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel 0 p) <> " groups created \n") p
tracePhylo1 :: Phylo -> Phylo
tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n") p
tracePhylo1 p = trace ("\n---------------\n--| Phylo 1 |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel 1 p) <> " groups created \n") p
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n") p
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
tracePhyloBase :: Phylo -> Phylo
......
......@@ -17,8 +17,9 @@ Portability : POSIX
module Gargantext.Viz.Phylo.LinkMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete, intersect, nub, groupBy, union)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, nub, groupBy, union, inits, scanl, find)
import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
import Gargantext.Prelude
......@@ -38,43 +39,23 @@ import Numeric.Statistics (percentile)
-----------------------------
-- | To choose a LevelLink strategy based an a given Level
shouldLink :: (Level,Level) -> PhyloGroup -> PhyloGroup -> Bool
shouldLink (lvl,lvl') g g'
| (lvl <= 1) && (lvl' <= 1) = doesContainsOrd (getGroupNgrams g) (getGroupNgrams g')
| otherwise = elem (getGroupId g) (getGroupLevelChildsId g')
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups (lvl,lvl') current targets
| lvl < lvl' = setLevelParents current
| lvl > lvl' = setLevelChilds current
| otherwise = current
linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
where
--------------------------------------
setLevelChilds :: PhyloGroup -> PhyloGroup
setLevelChilds = over (phylo_groupLevelChilds) addPointers
--------------------------------------
setLevelParents :: PhyloGroup -> PhyloGroup
setLevelParents = over (phylo_groupLevelParents) addPointers
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target ->
if shouldLink (lvl,lvl') current target
if (elem (getGroupId current) (getGroupLevelChildsId target))
then Just ((getGroupId target),1)
else Nothing) targets
--------------------------------------
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
map (\group -> if getGroupLevel group == lvl
then linkGroupToGroups (lvl,lvl') group
$ filterCandidates group
$ getGroupsWithFilters lvl' (getGroupPeriod group) p
else group) groups) p
setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
$ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
-------------------------------
......@@ -83,10 +64,10 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\groups ->
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods :: Filiation -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to' id l = case to' of
Descendant -> (tail . snd) next
Ascendant -> (reverse . fst) next
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to' limit id l = case to' of
Descendant -> take limit $ (tail . snd) next
Ascendant -> take limit $ (reverse . fst) next
_ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
where
--------------------------------------
......@@ -115,45 +96,102 @@ processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
_ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity :: Double -> Proximity -> Bool
filterProximity score prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
|| ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
$ listToPairs
$ filter (\g' -> (elem (getGroupPeriod g') prds)
&& ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
&& (((last' "makePairs" prds) == (getGroupPeriod g))
||((matchWithPairs g (g,g') p) >= (getThreshold $ getPhyloProximity p))))
$ getGroupsWithLevel (getGroupLevel g) p
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> [(Date,Date)] -> [PhyloGroup] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates filiation depth limit proximity periods candidates g1 phylo
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> [(Date,Date)] -> PhyloGroup -> Phylo -> ([Pointer],[Double])
findBestCandidates filiation depth limit proximity periods g1 phylo
| depth > limit || null nextPeriods = ([],[])
| (not . null) pointers = (head' "findBestCandidates" $ groupBy (\x y -> snd x == snd y) pointers
,map snd similarities)
| otherwise = findBestCandidates filiation (depth + 1) limit proximity periods candidates g1 phylo
| otherwise = findBestCandidates filiation (depth + 1) limit proximity periods g1 phylo
where
--------------------------------------
pointers :: [(PhyloGroupId, Double)]
pointers = reverse $ sortOn snd $ filter (\(_,score) -> case proximity of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
) similarities
pointers = reverse $ sortOn snd $ filter (\(_,score) -> filterProximity score proximity) similarities
--------------------------------------
similarities :: [(PhyloGroupId, Double)]
similarities = concat $ map (\(g2,g3) -> let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
cooc' = unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
ngrams' = union (getGroupNgrams g2) (getGroupNgrams g3)
score = processProximity proximity nbDocs cooc cooc' ngrams ngrams'
in nub $ [(getGroupId g2,score),(getGroupId g3,score)]) pairsOfCandidates
similarities = concat $ map (\(g2,g3) ->
let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] phylo
cooc' = if (g2 == g3)
then getGroupCooc g2
else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
ngrams' = if (g2 == g3)
then getGroupNgrams g2
else union (getGroupNgrams g2) (getGroupNgrams g3)
score = processProximity proximity nbDocs (getGroupCooc g1) cooc' (getGroupNgrams g1) ngrams'
in if (g2 == g3)
then [(getGroupId g2,score)]
else [(getGroupId g2,score),(getGroupId g3,score)] ) pairsOfCandidates
--------------------------------------
pairsOfCandidates :: [(PhyloGroup,PhyloGroup)]
pairsOfCandidates = listToFullCombi $ filter (\g -> elem (getGroupPeriod g) nextPeriods) candidates
--------------------------------------
cooc :: Map (Int,Int) Double
cooc = getGroupCooc g1
--------------------------------------
ngrams :: [Int]
ngrams = getGroupNgrams g1
pairsOfCandidates = makePairs nextPeriods g1 phylo
--------------------------------------
nextPeriods :: [(Date,Date)]
nextPeriods = take depth periods
--------------------------------------
--------------------------------------
matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
matchWithPairs g1 (g2,g3) p =
let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
cooc = if (g2 == g3)
then getGroupCooc g2
else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
ngrams = if (g2 == g3)
then getGroupNgrams g2
else union (getGroupNgrams g2) (getGroupNgrams g3)
in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of
Nothing -> []
Just pts -> head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers :: Maybe [Pointer]
pointers = find (not . null)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame ->
let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
$ concat
$ map (\(t,t') ->
let proxi = matchWithPairs g (t,t') p
in
if (t == t')
then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods
--------------------------------------
findBestCandidates' :: Proximity -> [PhyloGroup] -> PhyloGroup -> Phylo -> [Pointer]
......@@ -204,9 +242,10 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLeve
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
-- | a init avec la [[head groups]] et la tail groups
......@@ -236,26 +275,24 @@ toBranches mem gs
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) debug $ updateGroups fil lvl pointersMap p
interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
where
--------------------------------------
debug :: [Double]
debug = sort $ concat $ map (snd . snd) pointers
-- debug :: [Pointers]
-- debug = concat $ map (snd) pointers
--------------------------------------
pointersMap :: Map PhyloGroupId [Pointer]
pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
-- pointersMap :: Map PhyloGroupId [Pointer]
-- pointersMap = Map.fromList $ map (\(id,x) -> (id,fst x)) pointers
--------------------------------------
pointers :: [(PhyloGroupId,([Pointer],[Double]))]
pointers = concat
$ map (\branche ->
map (\g -> ( getGroupId g
, findBestCandidates fil 1 (getPhyloMatchingFrame p) prox (getNextPeriods fil (getGroupPeriod g) (getPhyloPeriods p)) (filterCandidates g branche) g p )
) branche ) branches
pointers :: [(PhyloGroupId,[Pointer])]
pointers =
let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
in (getGroupId g, phyloGroupMatching periods g p)) groups
pts' = pts `using` parList rdeepseq
in pts'
--------------------------------------
branches :: [[PhyloGroup]]
branches = tracePreBranches
$ toBranches [[head' "interTempoMatching" (getGroupsWithLevel lvl p)]]
$ tail (getGroupsWithLevel lvl p)
groups :: [PhyloGroup]
groups = getGroupsWithLevel lvl p
--------------------------------------
......@@ -265,12 +302,10 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) de
toLevelUp :: [Pointer] -> Phylo -> [Pointer]
toLevelUp lst p = Map.toList
$ map (\ws -> maximum ws)
$ fromListWith (++) [(id, [w]) | (id, w) <- pointers]
where
--------------------------------------
pointers :: [Pointer]
pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
--------------------------------------
$ fromListWith (++) [(id, [w]) | (id, w) <-
let pointers = map (\(id,v) -> (getGroupLevelParentId $ getGroupFromId id p, v)) lst
pointers' = pointers `using` parList rdeepseq
in pointers' ]
-- | Transpose the parent/child pointers from one level to another
......@@ -278,16 +313,14 @@ transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterGroupWithLevel
(\g ->
--------------------------------------
let childs = getGroupsFromIds (map fst $ getGroupLevelChilds g) p
ascLink = toLevelUp (concat $ map getGroupPeriodParents childs) p
desLink = toLevelUp (concat $ map getGroupPeriodChilds childs) p
let ascLink = toLevelUp (getGroupPeriodParents g) p
desLink = toLevelUp (getGroupPeriodChilds g) p
--------------------------------------
in g & phylo_groupPeriodParents %~ (++ ascLink)
& phylo_groupPeriodChilds %~ (++ desLink)
in g & phylo_groupPeriodParents .~ ascLink
& phylo_groupPeriodChilds .~ desLink
--------------------------------------
) lvl p
----------------
-- | Tracer | --
----------------
......@@ -301,6 +334,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
<> "with sizes : " <> show (map length bs) <> "\n") bs
......
......@@ -18,36 +18,20 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (last,concat,null,nub,(++),init,tail,elemIndex,groupBy,(!!))
import Data.List (concat,null,nub,(++),elemIndex,groupBy,(!!), (\\), union, intersect)
import Data.Map (fromList,mapKeys)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp :: Int -> PhyloGroup -> GroupGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp idx curr (nodes,edges) next memo
| null nodes' && null next' = memo'
| (not . null) next' = relatedComp idx (head' "relatedComp1" next' ) (nodes' ,edges) (tail next') memo'
| otherwise = relatedComp (idx + 1) (head' "relatedComp2" nodes') (tail nodes',edges) [] memo'
where
--------------------------------------
memo' :: [[PhyloGroup]]
memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
--------------------------------------
relatedComp :: [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
......
......@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
listToEqualCombi :: Eq a => [a] -> [(a,a)]
listToEqualCombi l = [(x,y) | x <- l, y <- l, x == y]
listToPairs :: Eq a => [a] -> [(a,a)]
listToPairs l = (listToEqualCombi l) ++ (listToUnDirectedCombi l)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
......@@ -195,6 +202,9 @@ getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloProximity :: Phylo -> Proximity
getPhyloProximity p = _q_interTemporalMatching $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchThr :: Phylo -> Double
getPhyloReBranchThr p = _q_reBranchThr $ _phyloParam_query $ getPhyloParams p
......@@ -243,7 +253,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse
) (\g -> if getGroupLevel g == lvl
then f g
else g ) p
else g ) p
-- | To alter each list of PhyloGroups following a given function
......
......@@ -35,6 +35,7 @@ extra-deps:
- KMP-0.1.0.2
- accelerate-1.2.0.0
- aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4
- duckling-0.1.3.0
- full-text-search-0.2.1.4
- fullstop-0.1.4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment