[hylo] small refactorings to make the code more readable

parent f48c9a41
Pipeline #7838 failed with stages
in 18 minutes and 5 seconds
...@@ -17,7 +17,6 @@ TODO: ...@@ -17,7 +17,6 @@ TODO:
-} -}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE MonoLocalBinds #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-} {-# OPTIONS_GHC -Wno-unused-top-binds #-}
......
...@@ -273,8 +273,7 @@ joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap p ...@@ -273,8 +273,7 @@ joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap p
-------------------------------------- --------------------------------------
rootsMap :: Map Int [PhyloGroupId] rootsMap :: Map Int [PhyloGroupId]
rootsMap = fromListWith (++) rootsMap = fromListWith (++)
$ concat -- flatten $ concatMap (\g ->
$ map (\g ->
map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g) map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
$ getGroupsFromScale 1 phylo $ getGroupsFromScale 1 phylo
...@@ -410,12 +409,12 @@ docsToTimeScaleCooc docs fdt = ...@@ -410,12 +409,12 @@ docsToTimeScaleCooc docs fdt =
-- TODO anoe -- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc] groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc = groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs)) case (prds, docs) of
then acc ([], _) -> acc
else (_, []) -> acc
let prd = head' "groupBy" prds (prd:prds', _) ->
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs let docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc) in groupDocsByPeriodRec f prds' (snd docs') (insert prd (fst docs') acc)
-- To group a list of Documents by fixed periods -- To group a list of Documents by fixed periods
...@@ -541,17 +540,18 @@ setDefault conf timeScale nbDocs = defaultConfig ...@@ -541,17 +540,18 @@ setDefault conf timeScale nbDocs = defaultConfig
-- --
initPhylo :: [Document] -> PhyloConfig -> Phylo initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf = initPhylo docs conf =
let roots = Vector.fromList $ D.nubWith T.unpack $ concat $ map text docs let roots = Vector.fromList $ D.nubWith T.unpack $ concatMap text docs
timeScale = head' "initPhylo" $ map docTime docs timeScale = head' "initPhylo" $ map docTime docs
foundations = PhyloFoundations roots empty foundations = PhyloFoundations { _foundations_roots = roots
docsSources = PhyloSources (Vector.fromList $ nubOrd $ concat $ map sources docs) , _foundations_rootsInGroups = empty }
docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots)) docsSources = PhyloSources {_sources = Vector.fromList $ nubOrd $ concatMap sources docs }
(docsToTimeScaleNb docs) docsCounts = PhyloCounts { coocByDate = docsToTimeScaleCooc docs roots
(docsToTimeTermCount docs (foundations ^. foundations_roots)) , docsByDate = docsToTimeScaleNb docs
(docsToTermCount docs (foundations ^. foundations_roots)) , rootsCountByDate = docsToTimeTermCount docs roots
(docsToTermFreq docs (foundations ^. foundations_roots)) , rootsCount = docsToTermCount docs roots
(docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots)) , rootsFreq = docsToTermFreq docs roots
params = if (defaultMode conf) , lastRootsFreq = docsToLastTermFreq (getTimePeriod timeScale) docs roots }
params = if defaultMode conf
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) } then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale) periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
...@@ -559,12 +559,15 @@ initPhylo docs conf = ...@@ -559,12 +559,15 @@ initPhylo docs conf =
<> show (length docs) <> " docs \n" :: Text) <> show (length docs) <> " docs \n" :: Text)
$ tracePhylo ("\n" <> "-- | lambda " $ tracePhylo ("\n" <> "-- | lambda "
<> show (_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text) <> show (_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text)
$ Phylo foundations $ Phylo { _phylo_foundations = foundations
docsSources , _phylo_sources = docsSources
docsCounts , _phylo_counts = docsCounts
[] , _phylo_seaLadder = []
params , _phylo_param = params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods) , _phylo_periods =
0 fromList $ map (\prd -> (prd, PhyloPeriod { _phylo_periodPeriod = prd
(_qua_granularity $ phyloQuality $ _phyloParam_config params) , _phylo_periodPeriodStr = ("", "")
Nothing , _phylo_periodScales = initPhyloScales 1 prd })) periods
, _phylo_quality = 0
, _phylo_level = _qua_granularity $ phyloQuality $ _phyloParam_config params
, _phylo_computeTime = Nothing }
...@@ -162,11 +162,9 @@ toFstDate ds = snd ...@@ -162,11 +162,9 @@ toFstDate ds = snd
toLstDate :: [Text] -> Text toLstDate :: [Text] -> Text
toLstDate ds = snd toLstDate ds = snd
$ head' "firstDate" $ head' "firstDate"
$ reverse $ sortOn (Down . fst) (map (\d ->
$ sortOn fst
$ map (\d ->
let d' = fromMaybe (error "toLstDate") $ readMaybe (filter (\c -> c `notElem` ['U','T','C',' ',':','-']) $ unpack d)::Int let d' = fromMaybe (error "toLstDate") $ readMaybe (filter (\c -> c `notElem` ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds in (d',d)) ds)
getTimeScale :: Phylo -> [Char] getTimeScale :: Phylo -> [Char]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment