[phylo] performance boost when there's lots of periods

parent aeb97dc2
Pipeline #7844 passed with stages
in 47 minutes and 36 seconds
......@@ -385,7 +385,14 @@ toSeriesOfClusteringFis phylo phyloDocs = fromList $ parMap rpar (func (corpusPa
toSeriesOfClusteringMaxClique :: Phylo -> Map (Date, Date) [Document] -> (Double, MaxCliqueFilter) -> Map (Date,Date) [Clustering]
toSeriesOfClusteringMaxClique phylo phyloDocs (thr, filterType) = fromList mcl
where
mcl = parMap rpar (\(prd,docs) ->
-- This looks innocent but is a big performance
-- improvement. Suppose there are few docs but many periods (like
-- when one computes phylo with second or minute
-- resolution). Without pattern matching on empty docs (many
-- periods will have no docs associated), one will fire
-- `getMaxCliques` with all the Accelerate machinery.
mapFunc (prd, []) = (prd, [])
mapFunc (prd, docs) =
let cooc = map round
$ foldl sumCooc empty
$ map (\d -> listToMatrix $ ngramsToIdx (text d) (getRoots phylo)) docs
......@@ -395,8 +402,8 @@ toSeriesOfClusteringMaxClique phylo phyloDocs (thr, filterType) = fromList mcl
, _clustering_period = prd
, _clustering_visWeighting = Nothing
, _clustering_visFiltering = [] })
$ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
$ getMaxCliques filterType Conditional thr cooc)
mcl = parMap rpar mapFunc $ toList phyloDocs
--------------------------------------
-- dev viz graph maxClique getMaxClique
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment