[phylo] small, mechanical refactorings

parent f54f2036
Pipeline #7649 failed with stages
in 97 minutes and 44 seconds
...@@ -120,8 +120,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -120,8 +120,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
where where
-------- --------
-- 2) find the local maxima in the quality distribution -- 2) find the local maxima in the quality distribution
-- TODO (seeg, #471) head throws errors when list is too short. -- TODO (seeg, #471) head throws errors when list is too short
-- I propose this implementation, but I'm not sure of the length of the list -- (i.e. List.head . List.tail requires at least 2 elements in the
-- list). I propose this implementation, but I'm not sure of the
-- length of the list
-- maxima = if List.length qua' > 1 then -- maxima = if List.length qua' > 1 then
-- [snd (List.head qua') > snd (List.head $ List.tail qua')] ++ -- [snd (List.head qua') > snd (List.head $ List.tail qua')] ++
-- (findMaxima qua') ++ -- (findMaxima qua') ++
...@@ -134,9 +136,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -134,9 +136,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-- 1.2) -- 1.2)
qua' :: [(Double,Double)] qua' :: [(Double,Double)]
qua' = foldl (\acc (s,q) -> qua' = foldl (\acc (s,q) ->
if length acc == 0 if null acc
then [(s,q)] then [(s,q)]
else if (snd (List.last acc)) == q else if snd (List.last acc) == q
then acc then acc
else acc ++ [(s,q)] else acc ++ [(s,q)]
) [] $ zip (Set.toList similarities) qua ) [] $ zip (Set.toList similarities) qua
...@@ -145,10 +147,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -145,10 +147,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
qua :: [Double] qua :: [Double]
qua = parMap rpar (\thr -> qua = parMap rpar (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph let edges = filter (\edge -> snd edge >= thr) graph
nodes = nubOrd $ concat $ map (\((n,n'),_) -> [n,n']) edges nodes = nubOrd $ concatMap (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges branches = toRelatedComponents nodes edges
in toPhyloQuality nbFdt lambda freq branches in toPhyloQuality nbFdt lambda freq branches
) $ (Set.toList similarities) ) $ Set.toList similarities
{- {-
...@@ -220,7 +222,7 @@ appendGroups f lvl m phylo = ...@@ -220,7 +222,7 @@ appendGroups f lvl m phylo =
-- select the cooc of the periods -- select the cooc of the periods
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId]) (elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])
-- select and merge the roots count of the periods -- select and merge the roots count of the periods
(foldl (\acc count -> unionWith (+) acc count) empty (foldl (\acc count -> unionWith (+) acc count) empty
$ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId])) $ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
...@@ -416,9 +418,9 @@ groupDocsByPeriod' f pds docs = ...@@ -416,9 +418,9 @@ groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds periods = parMap rpar (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show(length docs) <> show (length docs)
<> " docs by " <> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text) <> show (length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods $ fromList $ zip pds periods
where where
-------------------------------------- --------------------------------------
...@@ -435,8 +437,8 @@ groupDocsByPeriod f pds es = ...@@ -435,8 +437,8 @@ groupDocsByPeriod f pds es =
let periods = parMap rpar (inPeriode f es) pds let periods = parMap rpar (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show(length es) <> " docs by " <> show (length es) <> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text) <> show (length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods $ fromList $ zip pds periods
where where
-------------------------------------- --------------------------------------
...@@ -465,7 +467,7 @@ docsToTermCount docs roots = fromList ...@@ -465,7 +467,7 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double)) docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
docsToTimeTermCount docs roots = docsToTimeTermCount docs roots =
let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst)) let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
$ group $ D.sort l) $ group $ D.sort l)
$ fromListWith (++) $ fromListWith (++)
...@@ -492,9 +494,9 @@ docsToTimeScaleNb docs = ...@@ -492,9 +494,9 @@ docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1 time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in tracePhylo ("\n" <> "-- | Group " in tracePhylo ("\n" <> "-- | Group "
<> show(length docs) <> show (length docs)
<> " docs by " <> " docs by "
<> show(length time) <> show (length time)
<> " unit of time" <> "\n" :: Text) <> " unit of time" <> "\n" :: Text)
$ unionWith (+) time docs' $ unionWith (+) time docs'
...@@ -506,7 +508,7 @@ initPhyloScales lvlMax pId = ...@@ -506,7 +508,7 @@ initPhyloScales lvlMax pId =
setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig
setDefault conf timeScale nbDocs = defaultConfig setDefault conf timeScale nbDocs = defaultConfig
{ corpusPath = (corpusPath conf) { corpusPath = (corpusPath conf)
, listPath = (listPath conf) , listPath = (listPath conf)
, outputPath = (outputPath conf) , outputPath = (outputPath conf)
...@@ -515,11 +517,11 @@ setDefault conf timeScale nbDocs = defaultConfig ...@@ -515,11 +517,11 @@ setDefault conf timeScale nbDocs = defaultConfig
, phyloName = (phyloName conf) , phyloName = (phyloName conf)
, defaultMode = True , defaultMode = True
, timeUnit = timeScale , timeUnit = timeScale
, clique = Fis (toSupport nbDocs) 3} , clique = Fis (toSupport nbDocs) 3}
where where
-------------------------------------- --------------------------------------
toSupport :: Int -> Support toSupport :: Int -> Support
toSupport n toSupport n
| n < 500 = 1 | n < 500 = 1
| n < 1000 = 2 | n < 1000 = 2
| n < 2000 = 3 | n < 2000 = 3
...@@ -548,9 +550,9 @@ initPhylo docs conf = ...@@ -548,9 +550,9 @@ initPhylo docs conf =
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale) periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in tracePhylo ("\n" <> "-- | Init a phylo out of " in tracePhylo ("\n" <> "-- | Init a phylo out of "
<> show(length docs) <> " docs \n" :: Text) <> show (length docs) <> " docs \n" :: Text)
$ tracePhylo ("\n" <> "-- | lambda " $ tracePhylo ("\n" <> "-- | lambda "
<> show(_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text) <> show (_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text)
$ Phylo foundations $ Phylo foundations
docsSources docsSources
docsCounts docsCounts
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment