[phylo] small refactorings to make `toSeriesOfClustering` more readable

parent 04c0505c
Pipeline #7843 passed with stages
in 55 minutes and 27 seconds
...@@ -341,43 +341,63 @@ filterCliqueByNested m = ...@@ -341,43 +341,63 @@ filterCliqueByNested m =
-- | To transform a time map of docs into a time map of Fis with some filters -- | To transform a time map of docs into a time map of Fis with some filters
toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering] toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of toSeriesOfClustering phylo phyloDocs = case clique $ getConfig phylo of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -} {- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize) $ filterClique True s' filterCliqueBySize
{- \$ traceFis "Filtered by support" -} {- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport) $ filterClique True s filterCliqueBySupport
{- \$ traceFis "Unfiltered Fis" -} {- \$ traceFis "Unfiltered Fis" -}
seriesOfClustering (toSeriesOfClusteringFis phylo phyloDocs)
MaxClique s _ _ -> filterClique True s (filterCliqueBySize) MaxClique s thr filterType -> filterClique True s filterCliqueBySize
seriesOfClustering (toSeriesOfClusteringMaxClique phylo phyloDocs (thr, filterType))
where
-------------------------------------- --------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of toSeriesOfClusteringFis :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
Fis _ _ -> toSeriesOfClusteringFis phylo phyloDocs = fromList $ parMap rpar (func (corpusParser $ getConfig phylo)) (toList phyloDocs)
let fis = parMap rpar (\(prd,docs) -> where
case (corpusParser $ getConfig phylo) of func (Tsv' _) (prd, docs) =
Tsv' _ -> let lst = toList let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst) $ map (\d -> ( ngramsToIdx (text d) (getRoots phylo)
_ -> let lst = toList , ( weight d, sourcesToIdx (sources d) (getSources phylo)) )
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs) ) docs
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst) in ( prd
) , map (\f -> Clustering { _clustering_roots = Set.toList $ fst f
$ toList phyloDocs , _clustering_support = (fst . snd) f
in fromList fis , _clustering_period = prd
MaxClique _ thr filterType -> , _clustering_visWeighting = (fst . snd . snd) f
let mcl = parMap rpar (\(prd,docs) -> , _clustering_visFiltering = (snd . snd . snd) f } ) lst)
func _ (prd, docs) =
let lst = toList $
fisWithSizePolyMap (Segment 1 20) 1
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in ( prd
, map (\f -> Clustering { _clustering_roots = Set.toList $ fst f
, _clustering_support = snd f
, _clustering_period = prd
, _clustering_visWeighting = Just $ fromIntegral $ snd f
, _clustering_visFiltering = [] }) lst)
toSeriesOfClusteringMaxClique :: Phylo -> Map (Date, Date) [Document] -> (Double, MaxCliqueFilter) -> Map (Date,Date) [Clustering]
toSeriesOfClusteringMaxClique phylo phyloDocs (thr, filterType) = fromList mcl
where
mcl = parMap rpar (\(prd,docs) ->
let cooc = map round let cooc = map round
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map (\d -> listToMatrix $ ngramsToIdx (text d) (getRoots phylo)) docs
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs in ( prd
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc)) , map (\cl -> Clustering { _clustering_roots = cl
, _clustering_support = 0
, _clustering_period = prd
, _clustering_visWeighting = Nothing
, _clustering_visFiltering = [] })
$ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs $ toList phyloDocs
in fromList mcl --------------------------------------
--------------------------------------
-- dev viz graph maxClique getMaxClique -- dev viz graph maxClique getMaxClique
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment