[phylo] small, mechanical refactorings

parent f54f2036
Pipeline #7649 failed with stages
in 97 minutes and 44 seconds
......@@ -120,8 +120,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
where
--------
-- 2) find the local maxima in the quality distribution
-- TODO (seeg, #471) head throws errors when list is too short.
-- I propose this implementation, but I'm not sure of the length of the list
-- TODO (seeg, #471) head throws errors when list is too short
-- (i.e. List.head . List.tail requires at least 2 elements in the
-- list). I propose this implementation, but I'm not sure of the
-- length of the list
-- maxima = if List.length qua' > 1 then
-- [snd (List.head qua') > snd (List.head $ List.tail qua')] ++
-- (findMaxima qua') ++
......@@ -134,9 +136,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-- 1.2)
qua' :: [(Double,Double)]
qua' = foldl (\acc (s,q) ->
if length acc == 0
if null acc
then [(s,q)]
else if (snd (List.last acc)) == q
else if snd (List.last acc) == q
then acc
else acc ++ [(s,q)]
) [] $ zip (Set.toList similarities) qua
......@@ -145,10 +147,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
qua :: [Double]
qua = parMap rpar (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph
nodes = nubOrd $ concat $ map (\((n,n'),_) -> [n,n']) edges
nodes = nubOrd $ concatMap (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges
in toPhyloQuality nbFdt lambda freq branches
) $ (Set.toList similarities)
) $ Set.toList similarities
{-
......@@ -416,9 +418,9 @@ groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group "
<> show(length docs)
<> show (length docs)
<> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text)
<> show (length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods
where
--------------------------------------
......@@ -435,8 +437,8 @@ groupDocsByPeriod f pds es =
let periods = parMap rpar (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group "
<> show(length es) <> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text)
<> show (length es) <> " docs by "
<> show (length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods
where
--------------------------------------
......@@ -492,9 +494,9 @@ docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in tracePhylo ("\n" <> "-- | Group "
<> show(length docs)
<> show (length docs)
<> " docs by "
<> show(length time)
<> show (length time)
<> " unit of time" <> "\n" :: Text)
$ unionWith (+) time docs'
......@@ -548,9 +550,9 @@ initPhylo docs conf =
else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in tracePhylo ("\n" <> "-- | Init a phylo out of "
<> show(length docs) <> " docs \n" :: Text)
<> show (length docs) <> " docs \n" :: Text)
$ tracePhylo ("\n" <> "-- | lambda "
<> show(_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text)
<> show (_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text)
$ Phylo foundations
docsSources
docsCounts
......
......@@ -11,11 +11,10 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level)
import Control.Lens ( over, filtered, view, (%~) )
import Data.List (union, nub, init, tail, partition, nubBy, (!!))
import Data.List qualified as List
import Data.Map (elems, empty, fromList, findWithDefault, unionWith, keys, member, (!), filterWithKey, fromListWith, restrictKeys)
......@@ -30,7 +29,7 @@ import Data.Vector (Vector, elemIndex)
import Data.Vector qualified as Vector
import Gargantext.Core.Viz.Phylo
import Gargantext.Prelude hiding (empty)
import Text.Printf
import Text.Printf ( PrintfArg, printf )
------------
-- | Io | --
......@@ -68,7 +67,7 @@ truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
if member k m
then m ! k
else 0
......@@ -85,18 +84,16 @@ dropByIdx k l = take k l ++ drop (k+1) l
elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
elemIndex' e l = case List.elemIndex e l of
Nothing -> panic "[ERR][Viz.Phylo.PhyloTools] element not in list"
Just i -> i
commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
commonPrefix lst lst' acc =
if (null lst || null lst')
then acc
else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
else acc
commonPrefix lst lst' acc
| null lst || null lst' = acc
| head' "commonPrefix" lst == head' "commonPrefix" lst' = commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
| otherwise = acc
---------------------
......@@ -118,13 +115,13 @@ sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concatMap (\n -> ["|",n]) (ngramsToText ngrams l)
idxToLabel :: [Int] -> String
idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
idxToLabel l = List.unwords $ tail' "idxToLabel" $ concatMap (\n -> ["|",show n]) l
idxToLabel' :: [Double] -> String
idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concatMap (\n -> ["|",show n]) l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
......@@ -137,8 +134,7 @@ ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
periodsToYears periods = (Set.fromList . sort) (concatMap (\(d,d') -> [d..d']) periods)
findBounds :: [Date] -> (Date,Date)
......@@ -159,7 +155,7 @@ toFstDate ds = snd
$ head' "firstDate"
$ sortOn fst
$ map (\d ->
let d' = fromMaybe (error "toFstDate") $ readMaybe (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
let d' = fromMaybe (error "toFstDate") $ readMaybe (filter (\c -> c `notElem` ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds
toLstDate :: [Text] -> Text
......@@ -168,12 +164,12 @@ toLstDate ds = snd
$ reverse
$ sortOn fst
$ map (\d ->
let d' = fromMaybe (error "toLstDate") $ readMaybe (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
let d' = fromMaybe (error "toLstDate") $ readMaybe (filter (\c -> c `notElem` ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds
getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of
getTimeScale p = case timeUnit $ getConfig p of
Epoch {} -> "epoch"
Year {} -> "year"
Month {} -> "month"
......@@ -228,21 +224,21 @@ isNested l l'
-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled f thr l = if (null $ f thr l) && (not $ null l)
keepFilled f thr l = if null (f thr l) && not (null l)
then keepFilled f (thr - 1) l
else f thr l
-- | General workhorse to use in lieu of /trace/. It decides at compile
-- time whether or not debug logs are enabled.
tracePhylo :: (Print s, IsString s) => s -> a -> a
#if NO_PHYLO_DEBUG_LOGS
tracePhylo _ p = p
#else
tracePhylo msg p = trace msg p
#endif
traceClique :: Map (Date, Date) [Clustering] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show cpt <> ") " ) "" [1..6]
where
--------------------------------------
cliques :: [Double]
......@@ -251,7 +247,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
traceSupport :: Map (Date, Date) [Clustering] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show cpt <> ") " ) "" [1..6]
where
--------------------------------------
supports :: [Double]
......@@ -321,7 +317,7 @@ ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
in filterWithKey (\k _ -> k `elem` pairs) cooc
-----------------
......@@ -333,14 +329,14 @@ ngramsToCooc ngrams coocs =
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
ngramsToDensity :: [Int] -> [Cooc] -> (Map Int Double) -> Double
ngramsToDensity :: [Int] -> [Cooc] -> Map Int Double -> Double
ngramsToDensity ngrams coocs rootsCount =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToCombi' ngrams
density = map (\(i,j) ->
let nij = findWithDefault 0 (i,j) cooc
in (nij * nij) / ((rootsCount ! i) * (rootsCount ! j))) pairs
in (sum density) / (fromIntegral $ length ngrams)
in sum density / fromIntegral (length ngrams)
......@@ -351,14 +347,15 @@ ngramsToDensity ngrams coocs rootsCount =
-- | find the local maxima in a list of values
findMaxima :: [(Double,Double)] -> [Bool]
findMaxima lst = map (hasMax) $ toChunk 3 lst
findMaxima lst = map hasMax $ toChunk 3 lst
where
------
hasMax :: [(Double,Double)] -> Bool
hasMax chunk =
if (length chunk) /= 3
then False
else (snd(chunk !! 0) < snd(chunk !! 1)) && (snd(chunk !! 2) < snd(chunk !! 1))
(length chunk == 3) &&
(let snds = snd <$> chunk
in
(snds !! 0 < snds !! 1) && (snds !! 2 < snds !! 1))
-- | split a list into chunks of size n
......@@ -555,19 +552,19 @@ getLastRootsFreq phylo = lastRootsFreq (phylo ^. phylo_counts)
setConfig :: PhyloConfig -> Phylo -> Phylo
setConfig config phylo = phylo
& phylo_param .~ (PhyloParam
((phylo ^. phylo_param) ^. phyloParam_version)
((phylo ^. phylo_param) ^. phyloParam_software)
config)
& phylo_param .~ PhyloParam {
_phyloParam_version = phylo ^. (phylo_param . phyloParam_version)
, _phyloParam_software = phylo ^. (phylo_param . phyloParam_software)
, _phyloParam_config = config }
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getRoots phylo = phylo ^. (phylo_foundations . foundations_roots)
getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
getRootsInGroups phylo = phylo ^. (phylo_foundations . foundations_rootsInGroups)
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
......@@ -594,7 +591,7 @@ getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromScalePeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. filtered (\phyloPrd -> (phyloPrd ^. phylo_periodPeriod) `elem` periods)
. phylo_periodScales
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
......@@ -704,7 +701,7 @@ relatedComponents graph = foldl' (\branches groups ->
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges =
let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
clusters = relatedComponents $ (map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges ++ (map (\g -> [getGroupId g]) nodes))
in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
......@@ -746,15 +743,13 @@ getMinSharedNgrams proxi = case proxi of
----------------
intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
intersectInit acc lst lst' =
if (null lst) || (null lst')
then acc
else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc
intersectInit acc lst lst'
| null lst || null lst' = acc
| head' "intersectInit" lst == head' "intersectInit" lst' = intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
| otherwise = acc
branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * fromIntegral (length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
......@@ -769,7 +764,7 @@ traceMatchSuccess thr qua qua' nextBranches =
<> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
<> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
<> " - for the local threshold " <> show (thr)
<> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" :: Text
<> " ( quality : " <> show (qua) <> " < " <> show (qua') <> ")\n" :: Text
) nextBranches
......@@ -780,7 +775,7 @@ traceMatchFailure thr qua qua' branches =
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - split with failure for the local threshold " <> show (thr)
<> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n" :: Text
<> " ( quality : " <> show (qua) <> " > " <> show (qua') <> ")\n" :: Text
) branches
......@@ -812,9 +807,9 @@ traceMatchEnd groups =
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups =
tracePhylo ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n" :: Text ) groups
tracePhylo ( "\n" <> "-- | Start temporal matching for " <> show (length groups) <> " groups" <> "\n" :: Text ) groups
traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l =
tracePhylo ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n" :: Text ) l
tracePhylo ( "\n" <> "-- | " <> show (List.length l) <> " computed pairs of groups Similarity" <> "\n" :: Text ) l
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment