[phylo] some tests with using different par strategies

parent 766a8c4a
Pipeline #7768 passed with stages
in 41 minutes and 6 seconds
...@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where ...@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parMap, rpar, Strategy) import Control.Parallel.Strategies (parMap, rpar, rdeepseq)
import Data.Containers.ListUtils (nubOrd) import Data.Containers.ListUtils (nubOrd)
import Data.Discrimination qualified as D import Data.Discrimination qualified as D
import Data.List (partition, intersect, tail) import Data.List (partition, intersect, tail)
...@@ -39,11 +39,6 @@ import Gargantext.Prelude hiding (empty, toList) ...@@ -39,11 +39,6 @@ import Gargantext.Prelude hiding (empty, toList)
defaultStrategy :: Strategy a
defaultStrategy = rpar
------------------ ------------------
-- | To Phylo | -- -- | To Phylo | --
------------------ ------------------
...@@ -142,7 +137,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -142,7 +137,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-------- --------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double] qua :: [Double]
qua = parMap defaultStrategy (\thr -> qua = parMap rdeepseq (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph let edges = filter (\edge -> snd edge >= thr) graph
nodes = nubOrd $ concat $ map (\((n,n'),_) -> [n,n']) edges nodes = nubOrd $ concat $ map (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges branches = toRelatedComponents nodes edges
...@@ -183,7 +178,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -183,7 +178,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
docs = filterDocs (getDocsByDate phylo) ([period] ++ next) docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next) diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel -- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = parMap defaultStrategy (\source -> pairs = parMap rdeepseq (\source ->
let candidates = filter (\target -> (> 2) $ length let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target -> in map (\target ->
...@@ -321,7 +316,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= ...@@ -321,7 +316,7 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m = filterCliqueByNested m =
let clq = parMap defaultStrategy (\l -> let clq = parMap rpar (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem then mem
else else
...@@ -349,7 +344,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -349,7 +344,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
seriesOfClustering :: Map (Date,Date) [Clustering] seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = parMap defaultStrategy (\(prd,docs) -> let fis = parMap rpar (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of case (corpusParser $ getConfig phylo) of
Tsv' _ -> let lst = toList Tsv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
...@@ -361,7 +356,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -361,7 +356,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
$ toList phyloDocs $ toList phyloDocs
in fromList fis in fromList fis
MaxClique _ thr filterType -> MaxClique _ thr filterType ->
let mcl = parMap defaultStrategy (\(prd,docs) -> let mcl = parMap rdeepseq (\(prd,docs) ->
let cooc = map round let cooc = map round
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
...@@ -413,7 +408,7 @@ groupDocsByPeriodRec f prds docs acc = ...@@ -413,7 +408,7 @@ groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs = groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap defaultStrategy (inPeriode f docs') pds periods = parMap rpar (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show(length docs) <> show(length docs)
<> " docs by " <> " docs by "
...@@ -431,7 +426,7 @@ groupDocsByPeriod' f pds docs = ...@@ -431,7 +426,7 @@ groupDocsByPeriod' f pds docs =
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = groupDocsByPeriod f pds es =
let periods = parMap defaultStrategy (inPeriode f es) pds let periods = parMap rpar (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show(length es) <> " docs by " <> show(length es) <> " docs by "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment