[phylo] small refactorings to make `toSeriesOfClustering` more readable

parent 04c0505c
Pipeline #7843 passed with stages
in 55 minutes and 27 seconds
......@@ -341,44 +341,64 @@ filterCliqueByNested m =
-- | To transform a time map of docs into a time map of Fis with some filters
toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
toSeriesOfClustering phylo phyloDocs = case clique $ getConfig phylo of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize)
$ filterClique True s' filterCliqueBySize
{- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport)
$ filterClique True s filterCliqueBySupport
{- \$ traceFis "Unfiltered Fis" -}
seriesOfClustering
MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
seriesOfClustering
where
--------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = parMap rpar (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of
Tsv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
)
$ toList phyloDocs
in fromList fis
MaxClique _ thr filterType ->
let mcl = parMap rpar (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
in fromList mcl
(toSeriesOfClusteringFis phylo phyloDocs)
MaxClique s thr filterType -> filterClique True s filterCliqueBySize
(toSeriesOfClusteringMaxClique phylo phyloDocs (thr, filterType))
--------------------------------------
toSeriesOfClusteringFis :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClusteringFis phylo phyloDocs = fromList $ parMap rpar (func (corpusParser $ getConfig phylo)) (toList phyloDocs)
where
func (Tsv' _) (prd, docs) =
let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1
$ map (\d -> ( ngramsToIdx (text d) (getRoots phylo)
, ( weight d, sourcesToIdx (sources d) (getSources phylo)) )
) docs
in ( prd
, map (\f -> Clustering { _clustering_roots = Set.toList $ fst f
, _clustering_support = (fst . snd) f
, _clustering_period = prd
, _clustering_visWeighting = (fst . snd . snd) f
, _clustering_visFiltering = (snd . snd . snd) f } ) lst)
func _ (prd, docs) =
let lst = toList $
fisWithSizePolyMap (Segment 1 20) 1
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in ( prd
, map (\f -> Clustering { _clustering_roots = Set.toList $ fst f
, _clustering_support = snd f
, _clustering_period = prd
, _clustering_visWeighting = Just $ fromIntegral $ snd f
, _clustering_visFiltering = [] }) lst)
toSeriesOfClusteringMaxClique :: Phylo -> Map (Date, Date) [Document] -> (Double, MaxCliqueFilter) -> Map (Date,Date) [Clustering]
toSeriesOfClusteringMaxClique phylo phyloDocs (thr, filterType) = fromList mcl
where
mcl = parMap rpar (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map (\d -> listToMatrix $ ngramsToIdx (text d) (getRoots phylo)) docs
in ( prd
, map (\cl -> Clustering { _clustering_roots = cl
, _clustering_support = 0
, _clustering_period = prd
, _clustering_visWeighting = Nothing
, _clustering_visFiltering = [] })
$ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
--------------------------------------
-- dev viz graph maxClique getMaxClique
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment