Commit 63e3a6fd authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Haddock documentation ok

parent 43fe729b
Pipeline #921 failed with stage
...@@ -27,13 +27,13 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -27,13 +27,13 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
arbitrary = JobOutput <$> arbitrary arbitrary = JobOutput <$> arbitrary
-- | Main Types -- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = All data ExternalAPIs = All
| PubMed | PubMed
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
deriving (Show, Eq, Enum, Bounded, Generic) deriving (Show, Eq, Enum, Bounded, Generic)
-- | IsidoreAuth
-- | Main Instances -- | Main Instances
......
...@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t) ...@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]] uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation)) uniText = map (List.filter (not . isPunctuation))
. map tokenize . map tokenize
. sentences -- | TODO get sentences according to lang . sentences -- TODO get sentences according to lang
. Text.toLower . Text.toLower
...@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst ...@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst
| otherwise = x | otherwise = x
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
type Entropy e = type Entropy e =
( Fractional e ( Fractional e
, Floating e , Floating e
, P.RealFloat e , P.RealFloat e
, Show e , Show e
-- ^ TODO: only used for debugging
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Example and tests for development -- | Example and tests for development
......
...@@ -65,11 +65,13 @@ data SeaElevation = ...@@ -65,11 +65,13 @@ data SeaElevation =
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double -- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching -- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double -- , _wlj_elevation :: Double
-}
} }
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
......
...@@ -235,13 +235,16 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m ...@@ -235,13 +235,16 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\] -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
-- --
distributional :: Matrix Int -> Matrix Double distributional :: Matrix Int -> Matrix Double
distributional m = run -- $ matMiniMax distributional m = run {- -- $ matMiniMax
-- $ ri -- $ ri
-- $ myMin -- $ myMin
-}
$ filter' 0 $ filter' 0
$ s_mi $ s_mi
$ map fromIntegral -- ^ from Int to Double $ map fromIntegral
$ use m -- ^ push matrix in Accelerate type {- from Int to Double -}
$ use m
{- push matrix in Accelerate type -}
where where
-- filter m = zipWith (\a b -> max a b) m (transpose m) -- filter m = zipWith (\a b -> max a b) m (transpose m)
......
...@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p ...@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus | otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1) $ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
-- $ transposePeriodLinks (lvl + 1) -- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant $ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant $ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant $ traceTranspose (lvl + 1) Ascendant
...@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p ...@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1 Fis (FisParams k s t) -> traceBranches 1
-- $ reLinkPhyloBranches 1 -- \$ reLinkPhyloBranches 1
-- $ traceBranches 1 -- \$ traceBranches 1
$ setPhyloBranches 1 $ setPhyloBranches 1
$ traceTempoMatching Descendant 1 $ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1 $ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox $ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1 $ tracePhyloN 1
-- $ setLevelLinks (0,1) -- \$ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis) $ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where where
......
...@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer] ...@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of phyloGroupMatching periods g p = case pointers of
Nothing -> [] Nothing -> []
Just pts -> head' "phyloGroupMatching" Just pts -> head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity -- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt') $ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts $ reverse $ sortOn snd pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold -- Find the first time frame where at leats one pointer satisfies the proximity threshold
where where
-------------------------------------- --------------------------------------
pointers :: Maybe [Pointer] pointers :: Maybe [Pointer]
pointers = find (not . null) pointers = find (not . null)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups -- For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame -> $ scanl (\acc frame ->
let pairs = makePairs frame g p let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p)) in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
...@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of ...@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of
if (t == t') if (t == t')
then [(getGroupId t,proxi)] then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) [] else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years -- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods $ inits periods
-------------------------------------- --------------------------------------
...@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p ...@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Make links from Period to Period after level 1 -- | Make links from Period to Period after level 1
listToTuple :: (a -> b) -> [a] -> [(b,a)] listToTuple :: (a -> b) -> [a] -> [(b,a)]
listToTuple f l = map (\x -> (f x, x)) l listToTuple f l = map (\x -> (f x, x)) l
......
...@@ -90,13 +90,13 @@ findDynamics n pv pn m = ...@@ -90,13 +90,13 @@ findDynamics n pv pn m =
bid = fromJust $ (pn ^. pn_bid) bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | emergence -- emergence
then 2 then 2
else if ((fst prd) == (fst $ m ! n)) else if ((fst prd) == (fst $ m ! n))
-- | recombination -- recombination
then 0 then 0
else if (not $ sharedWithParents (fst prd) bid n pv) else if (not $ sharedWithParents (fst prd) bid n pv)
-- | decrease -- decrease
then 1 then 1
else 3 else 3
...@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s ...@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s
inter :: Map (Int, Int) Double inter :: Map (Int, Int) Double
inter = intersection f1 f2 inter = intersection f1 f2
-------------------------------------- --------------------------------------
...@@ -171,12 +171,12 @@ exportToDot phylo export = ...@@ -171,12 +171,12 @@ exportToDot phylo export =
<> "##########################") $ <> "##########################") $
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
-- | 1) init the dot graph {- 1) init the dot graph -}
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))] graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio , Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]] , Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes {-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
...@@ -185,36 +185,36 @@ exportToDot phylo export = ...@@ -185,36 +185,36 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
]) ])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels -- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
{-
-- | 3) group the branches by hierarchy -- 3) group the branches by hierarchy
-- mapM (\branches -> -- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do -- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank] -- graphAttrs [Rank SameRank]
-- -- | 4) create a node for each branch -- -- 4) create a node for each branch
-- mapM branchToDotNode branches -- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
-- | 5) create a layer for each period {-- 5) create a layer for each period -}
_ <- mapM (\period -> _ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
periodToDotNode period periodToDotNode period
-- | 6) create a node for each group {-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups) mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
) $ getPeriodIds phylo ) $ getPeriodIds phylo
-- | 7) create the edges between a branch and its first groups {-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) -> _ <- mapM (\(bId,groups) ->
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
) )
...@@ -224,31 +224,29 @@ exportToDot phylo export = ...@@ -224,31 +224,29 @@ exportToDot phylo export =
$ sortOn (fst . _phylo_groupPeriod) groups) $ sortOn (fst . _phylo_groupPeriod) groups)
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
-- | 8) create the edges between the groups {- 8) create the edges between the groups -}
_ <- mapM (\((k,k'),_) -> _ <- mapM (\((k,k'),_) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups ) $ (toList . mergePointers) $ export ^. export_groups
-- | 7) create the edges between the periods {- 7) create the edges between the periods -}
_ <- mapM (\(prd,prd') -> _ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
-- | 8) create the edges between the branches {- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
---------------- ----------------
-- | Filter | -- -- | Filter | --
---------------- ----------------
...@@ -439,13 +437,13 @@ toDynamics n parents g m = ...@@ -439,13 +437,13 @@ toDynamics n parents g m =
let prd = g ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease {- decrease -}
then 2 then 2
else if ((fst prd) == (fst $ m ! n)) else if ((fst prd) == (fst $ m ! n))
-- | recombination {- recombination -}
then 0 then 0
else if isNew else if isNew
-- | emergence {- emergence -}
then 1 then 1
else 3 else 3
where where
......
...@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs = ...@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs =
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree]) (1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] []
...@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of ...@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
--------------------------- ---------------------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique keep thr f m = case keep of filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- To filter Fis with small Support
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size -- To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = map (\l ->
...@@ -173,16 +173,16 @@ filterCliqueByNested m = ...@@ -173,16 +173,16 @@ filterCliqueByNested m =
in fromList $ zip (keys m) clq' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters -- To transform a time map of docs innto a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique] toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
-- $ traceFis "Filtered by clique size" {- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize) $ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support" {- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport) $ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis" {- \$ traceFis "Unfiltered Fis" -}
phyloClique phyloClique
MaxClique _ -> undefined MaxClique _ -> undefined
where where
...@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
-------------------- --------------------
-- | To transform the docs into a time map of coocurency matrix -- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt = docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
...@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt = ...@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | -- -- | to Phylo Base | --
----------------------- -----------------------
-- | To group a list of Documents by fixed periods -- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs = groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
...@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs = ...@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs =
-- | To group a list of Documents by fixed periods -- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = groupDocsByPeriod f pds es =
...@@ -265,7 +265,7 @@ docsToTermFreq docs fdt = ...@@ -265,7 +265,7 @@ docsToTermFreq docs fdt =
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
-- | To count the number of docs by unit of time -- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
...@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId = ...@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
-- | To init the basic elements of a Phylo -- To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf = toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
......
...@@ -36,13 +36,13 @@ import qualified Data.Set as Set ...@@ -36,13 +36,13 @@ import qualified Data.Set as Set
mergeBranchIds :: [[Int]] -> [Int] mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where where
-- | 2) find the most Up Left ids in the hierarchy of similarity -- 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]] -- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' = -- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids' -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds -- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids -- 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]] mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' = mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
...@@ -58,12 +58,12 @@ mergeMeta bId groups = ...@@ -58,12 +58,12 @@ mergeMeta bId groups =
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups = groupsToBranches' groups =
-- | run the related component algorithm -- run the related component algorithm
let egos = map (\g -> [getGroupId g] let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents) ++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups ++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
graph = relatedComponents egos graph = relatedComponents egos
-- | update each group's branch id -- update each group's branch id
in map (\ids -> in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids) let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
...@@ -103,26 +103,26 @@ toNextLevel' phylo groups = ...@@ -103,26 +103,26 @@ toNextLevel' phylo groups =
newGroups = concat $ groupsToBranches' newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g)) $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group -- 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups' let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [parent]) [] in acc ++ [parent]) []
-- | 3) group the current groups by parentId -- 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- | 6) update each period at curLvl + 1 -- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1))) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- | 7) by adding the parents -- 7) by adding the parents
(\phyloLvl -> (\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newPeriods if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod)) .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl) else phyloLvl)
-- | 2) add the curLvl + 1 phyloLevel to the phylo -- 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1) $ addPhyloLevel (curLvl + 1)
-- | 1) update the current groups (with level parent pointers) in the phylo -- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
-------------------- --------------------
...@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), ...@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch = reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups -- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd] let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp -> in map (\comp ->
-- | 4) add to each groups their futur level parent group -- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp ) in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- |3) reduce the graph a a set of related components -- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
...@@ -251,4 +251,4 @@ synchronicClustering phylo = ...@@ -251,4 +251,4 @@ synchronicClustering phylo =
-- <> "\n" -- <> "\n"
-- ) "" edges -- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch) -- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo -- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment