[hylo] small refactorings to make the code more readable

parent f48c9a41
Pipeline #7838 failed with stages
in 18 minutes and 5 seconds
......@@ -17,7 +17,6 @@ TODO:
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
......
......@@ -273,8 +273,7 @@ joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap p
--------------------------------------
rootsMap :: Map Int [PhyloGroupId]
rootsMap = fromListWith (++)
$ concat -- flatten
$ map (\g ->
$ concatMap (\g ->
map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
$ getGroupsFromScale 1 phylo
......@@ -410,12 +409,12 @@ docsToTimeScaleCooc docs fdt =
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs))
then acc
else
let prd = head' "groupBy" prds
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
case (prds, docs) of
([], _) -> acc
(_, []) -> acc
(prd:prds', _) ->
let docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f prds' (snd docs') (insert prd (fst docs') acc)
-- To group a list of Documents by fixed periods
......@@ -541,17 +540,18 @@ setDefault conf timeScale nbDocs = defaultConfig
--
initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf =
let roots = Vector.fromList $ D.nubWith T.unpack $ concat $ map text docs
let roots = Vector.fromList $ D.nubWith T.unpack $ concatMap text docs
timeScale = head' "initPhylo" $ map docTime docs
foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nubOrd $ concat $ map sources docs)
docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
(docsToTimeTermCount docs (foundations ^. foundations_roots))
(docsToTermCount docs (foundations ^. foundations_roots))
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots))
params = if (defaultMode conf)
foundations = PhyloFoundations { _foundations_roots = roots
, _foundations_rootsInGroups = empty }
docsSources = PhyloSources {_sources = Vector.fromList $ nubOrd $ concatMap sources docs }
docsCounts = PhyloCounts { coocByDate = docsToTimeScaleCooc docs roots
, docsByDate = docsToTimeScaleNb docs
, rootsCountByDate = docsToTimeTermCount docs roots
, rootsCount = docsToTermCount docs roots
, rootsFreq = docsToTermFreq docs roots
, lastRootsFreq = docsToLastTermFreq (getTimePeriod timeScale) docs roots }
params = if defaultMode conf
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
......@@ -559,12 +559,15 @@ initPhylo docs conf =
<> show (length docs) <> " docs \n" :: Text)
$ tracePhylo ("\n" <> "-- | lambda "
<> show (_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text)
$ Phylo foundations
docsSources
docsCounts
[]
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods)
0
(_qua_granularity $ phyloQuality $ _phyloParam_config params)
Nothing
$ Phylo { _phylo_foundations = foundations
, _phylo_sources = docsSources
, _phylo_counts = docsCounts
, _phylo_seaLadder = []
, _phylo_param = params
, _phylo_periods =
fromList $ map (\prd -> (prd, PhyloPeriod { _phylo_periodPeriod = prd
, _phylo_periodPeriodStr = ("", "")
, _phylo_periodScales = initPhyloScales 1 prd })) periods
, _phylo_quality = 0
, _phylo_level = _qua_granularity $ phyloQuality $ _phyloParam_config params
, _phylo_computeTime = Nothing }
......@@ -162,11 +162,9 @@ toFstDate ds = snd
toLstDate :: [Text] -> Text
toLstDate ds = snd
$ head' "firstDate"
$ reverse
$ sortOn fst
$ map (\d ->
$ sortOn (Down . fst) (map (\d ->
let d' = fromMaybe (error "toLstDate") $ readMaybe (filter (\c -> c `notElem` ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds
in (d',d)) ds)
getTimeScale :: Phylo -> [Char]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment