Commit 572e7fa2 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE] Phylo

parents 22b14f56 6502c4c6
...@@ -5,7 +5,7 @@ cabal-version: 1.12 ...@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.6.5 version: 0.0.6.9.9.6.5
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -66,7 +66,7 @@ data SeaElevation = ...@@ -66,7 +66,7 @@ data SeaElevation =
| Adaptative | Adaptative
{ _adap_steps :: Double } { _adap_steps :: Double }
| Evolving | Evolving
{ _evol_neighborhood :: Bool } { _evol_neighborhood :: Bool }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation instance ToSchema SeaElevation
...@@ -78,8 +78,8 @@ data PhyloSimilarity = ...@@ -78,8 +78,8 @@ data PhyloSimilarity =
| WeightedLogSim | WeightedLogSim
{ _wls_sensibility :: Double { _wls_sensibility :: Double
, _wls_minSharedNgrams :: Int } , _wls_minSharedNgrams :: Int }
| Hamming | Hamming
{ _hmg_sensibility :: Double { _hmg_sensibility :: Double
, _hmg_minSharedNgrams :: Int} , _hmg_minSharedNgrams :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -207,7 +207,7 @@ data PhyloSubConfig = ...@@ -207,7 +207,7 @@ data PhyloSubConfig =
subConfig2config :: PhyloSubConfig -> PhyloConfig subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1 subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
...@@ -430,7 +430,6 @@ data Phylo = ...@@ -430,7 +430,6 @@ data Phylo =
instance ToSchema Phylo where instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
---------------- ----------------
-- | Period | -- -- | Period | --
---------------- ----------------
......
...@@ -188,4 +188,3 @@ instance ToParamSchema Metric ...@@ -188,4 +188,3 @@ instance ToParamSchema Metric
instance ToParamSchema Order instance ToParamSchema Order
instance ToParamSchema Sort instance ToParamSchema Sort
instance ToSchema Order instance ToSchema Order
...@@ -66,50 +66,58 @@ toAttr :: AttributeName -> Lazy.Text -> CustomAttribute ...@@ -66,50 +66,58 @@ toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
toAttr k v = customAttribute k v toAttr k v = customAttribute k v
metaToAttr :: Map Text.Text [Double] -> [CustomAttribute] metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta metaToAttr meta = map (\(k, v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
groupIdToDotId :: PhyloGroupId -> DotId groupIdToDotId :: PhyloGroupId -> DotId
groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx)) groupIdToDotId (((d, d'), lvl), idx) =
(fromStrict . Text.pack) $ "group" <> show d <> show d' <> show lvl <> show idx
branchIdToDotId :: PhyloBranchId -> DotId branchIdToDotId :: PhyloBranchId -> DotId
branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId)) branchIdToDotId bId = (fromStrict . Text.pack) $ "branch" <> show (snd bId)
periodIdToDotId :: Period -> DotId periodIdToDotId :: Period -> DotId
periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd)) periodIdToDotId prd = (fromStrict . Text.pack) $ "period" <> show (fst prd) <> show (snd prd)
groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
groupToTable fdt g = H.Table H.HTable groupToTable fdt g =
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft] H.Table H.HTable
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)] { H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableRows = [header] , H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]] , H.tableRows = [header]
<> ( map ngramsToRow $ splitEvery 4 <> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
$ reverse $ sortOn (snd . snd) <> ( map ngramsToRow $ splitEvery 4
$ zip (ngramsToText fdt (g ^. phylo_groupNgrams)) $ reverse $ sortOn (snd . snd)
$ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))} $ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
$ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
where where
-------------------------------------- --------------------------------------
ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row ngramsToRow :: [(Ngrams, (Double, Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) -> ngramsToRow ns =
H.LabelCell [ H.Align H.HLeft H.Cells $ map (\(n, (d, _)) ->
, dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns H.LabelCell [ H.Align H.HLeft
-------------------------------------- , dynamicToColor $ floor d] $ H.Text [H.Str $ fromStrict n]) ns
header :: H.Row --------------------------------------
header = header :: H.Row
H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")] header =
$ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel) H.Cells [ H.LabelCell [pickLabelColor $ floor <$> ((g ^. phylo_groupMeta) ! "dynamics")]
<> fromStrict " ( " $ H.Text [H.Str $ ((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
<> (pack $ show (fst $ g ^. phylo_groupPeriod)) <> fromStrict " ( "
<> fromStrict " , " <> (pack $ show (fst $ g ^. phylo_groupPeriod))
<> (pack $ show (snd $ g ^. phylo_groupPeriod)) <> fromStrict " , "
<> fromStrict " ) " <> (pack $ show (snd $ g ^. phylo_groupPeriod))
<> (pack $ show (getGroupId g))]] <> fromStrict " ) "
<> (pack $ show (getGroupId g))]]
-------------------------------------- --------------------------------------
branchToDotNode :: PhyloBranch -> Int -> Dot DotId branchToDotNode :: PhyloBranch -> Int -> Dot DotId
branchToDotNode b bId = branchToDotNode b bId =
node (branchIdToDotId $ b ^. branch_id) node (branchIdToDotId $ b ^. branch_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)] ( [ FillColor [toWColor CornSilk]
, FontName "Arial"
, FontSize 40
, Shape Egg
, Style [SItem Bold []]
, Label (toDotLabel $ b ^. branch_label) ]
<> (metaToAttr $ b ^. branch_meta) <> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch" <> [ toAttr "nodeType" "branch"
, toAttr "bId" (pack $ show bId) , toAttr "bId" (pack $ show bId)
...@@ -121,37 +129,42 @@ branchToDotNode b bId = ...@@ -121,37 +129,42 @@ branchToDotNode b bId =
periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
periodToDotNode prd prd' = periodToDotNode prd prd' =
node (periodIdToDotId prd) node (periodIdToDotId prd) $
([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))] [ Shape BoxShape
, FontSize 50
, Label $ toDotLabel $ Text.pack $ show (fst prd) <> " " <> show (snd prd) ]
<> [ toAttr "nodeType" "period" <> [ toAttr "nodeType" "period"
, toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd')) , toAttr "strFrom" $ fromStrict $ Text.pack $ show $ fst prd'
, toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd')) , toAttr "strTo" $ fromStrict $ Text.pack $ show $ snd prd'
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd)) , toAttr "from" $ fromStrict $ Text.pack $ show $ fst prd
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))]) , toAttr "to" $ fromStrict $ Text.pack $ show $ snd prd ]
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
groupToDotNode fdt g bId = groupToDotNode fdt g bId =
node (groupIdToDotId $ getGroupId g) node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)] ( [ FontName "Arial"
<> [ toAttr "nodeType" "group" , Shape Square
, toAttr "gid" (groupIdToDotId $ getGroupId g) , penWidth 4
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod)) , toLabel (groupToTable fdt g) ]
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod)) <> [ toAttr "nodeType" "group"
, toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod')) , toAttr "gid" (groupIdToDotId $ getGroupId g)
, toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod')) , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId)) , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "bId" (pack $ show bId) , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
, toAttr "support" (pack $ show (g ^. phylo_groupSupport)) , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "weight" (pack $ show (g ^. phylo_groupWeight)) , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources)) , toAttr "bId" (pack $ show bId)
, toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources)) , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams))) , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams))) , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics"))) , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence"))) , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels")) , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
]) , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
, toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
])
toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
...@@ -598,7 +611,13 @@ getGroupThr step g = ...@@ -598,7 +611,13 @@ getGroupThr step g =
breaks = (g ^. phylo_groupMeta) ! "breaks" breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup toAncestor :: Double
-> Map Int Double
-> PhyloSimilarity
-> Double
-> [PhyloGroup]
-> PhyloGroup
-> PhyloGroup
toAncestor nbDocs diago similarity step candidates ego = toAncestor nbDocs diago similarity step candidates ego =
let curr = ego ^. phylo_groupAncestors let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w)) in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
...@@ -607,7 +626,13 @@ toAncestor nbDocs diago similarity step candidates ego = ...@@ -607,7 +626,13 @@ toAncestor nbDocs diago similarity step candidates ego =
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates)) $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double -> Map Int Double -> PhyloSimilarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup] headsToAncestors :: Double
-> Map Int Double
-> PhyloSimilarity
-> Double
-> [PhyloGroup]
-> [PhyloGroup]
-> [PhyloGroup]
headsToAncestors nbDocs diago similarity step heads acc = headsToAncestors nbDocs diago similarity step heads acc =
if (null heads) if (null heads)
then acc then acc
......
...@@ -48,8 +48,8 @@ data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo} ...@@ -48,8 +48,8 @@ data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
toPhylo' (PhyloN phylo) = toPhylo' toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo toPhylo' (PhyloBase phylo) = toPhylo
-} -}
-- TODO an adaptative synchronic clustering with a slider -- TODO an adaptative synchronic clustering with a slider
...@@ -58,11 +58,11 @@ toPhylo :: Phylo -> Phylo ...@@ -58,11 +58,11 @@ toPhylo :: Phylo -> Phylo
toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink) $ toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1 if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)] then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phyloAncestors else phyloAncestors
where where
-------------------------------------- --------------------------------------
phyloAncestors :: Phylo phyloAncestors :: Phylo
phyloAncestors = phyloAncestors =
if (findAncestors $ getConfig phylowithoutLink) if (findAncestors $ getConfig phylowithoutLink)
then toHorizon phyloWithLinks then toHorizon phyloWithLinks
else phyloWithLinks else phyloWithLinks
...@@ -77,44 +77,44 @@ toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink ...@@ -77,44 +77,44 @@ toPhylo phylowithoutLink = traceToPhylo (phyloScale $ getConfig phylowithoutLink
----------------------------- -----------------------------
{- {-
-- create a square ladder -- create a square ladder
-} -}
squareLadder :: [Double] -> [Double] squareLadder :: [Double] -> [Double]
squareLadder ladder = List.map (\x -> x * x) ladder squareLadder ladder = List.map (\x -> x * x) ladder
{- {-
-- create an adaptative 'sea elevation' ladder -- create an adaptative 'sea elevation' ladder
-} -}
adaptSeaLadder :: Double -> Set Double -> Set Double -> [Double] adaptSeaLadder :: Double -> Set Double -> Set Double -> [Double]
adaptSeaLadder curr similarities ladder = adaptSeaLadder curr similarities ladder =
if curr <= 0 || Set.null similarities if curr <= 0 || Set.null similarities
then Set.toList ladder then Set.toList ladder
else else
let idx = ((Set.size similarities) `div` (floor curr)) - 1 let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2 -- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptSeaLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder) in adaptSeaLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{- {-
-- create a constante 'sea elevation' ladder -- create a constante 'sea elevation' ladder
-} -}
constSeaLadder :: Double -> Double -> Set Double -> [Double] constSeaLadder :: Double -> Double -> Set Double -> [Double]
constSeaLadder curr step ladder = constSeaLadder curr step ladder =
if curr > 1 if curr > 1
then Set.toList ladder then Set.toList ladder
else constSeaLadder (curr + step) step (Set.insert curr ladder) else constSeaLadder (curr + step) step (Set.insert curr ladder)
{- {-
-- create an evolving 'sea elevation' ladder based on estimated & local quality maxima -- create an evolving 'sea elevation' ladder based on estimated & local quality maxima
-} -}
evolvSeaLadder :: Double -> Double -> Map Int Double -> Set Double -> [((PhyloGroup,PhyloGroup),Double)] -> [Double] evolvSeaLadder :: Double -> Double -> Map Int Double -> Set Double -> [((PhyloGroup,PhyloGroup),Double)] -> [Double]
evolvSeaLadder nbFdt lambda freq similarities graph = map snd evolvSeaLadder nbFdt lambda freq similarities graph = map snd
$ filter fst $ filter fst
$ zip maxima (map fst qua') $ zip maxima (map fst qua')
-- 3) find the corresponding measures of similarity and create the ladder -- 3) find the corresponding measures of similarity and create the ladder
where where
...@@ -125,7 +125,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -125,7 +125,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-------- --------
-- 1.2) -- 1.2)
qua' :: [(Double,Double)] qua' :: [(Double,Double)]
qua' = foldl (\acc (s,q) -> qua' = foldl (\acc (s,q) ->
if length acc == 0 if length acc == 0
then [(s,q)] then [(s,q)]
else if (snd (List.last acc)) == q else if (snd (List.last acc)) == q
...@@ -135,7 +135,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -135,7 +135,7 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-------- --------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double] qua :: [Double]
qua = map (\thr -> qua = map (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph let edges = filter (\edge -> snd edge >= thr) graph
nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges branches = toRelatedComponents nodes edges
...@@ -143,46 +143,46 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -143,46 +143,46 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
) $ (Set.toList similarities) ) $ (Set.toList similarities)
{- {-
-- find a similarity ladder regarding the "sea elevation" strategy -- find a similarity ladder regarding the "sea elevation" strategy
-} -}
findSeaLadder :: Phylo -> Phylo findSeaLadder :: Phylo -> Phylo
findSeaLadder phylo = case getSeaElevation phylo of findSeaLadder phylo = case getSeaElevation phylo of
Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty) Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty)
Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty) Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty)
Evolving _ -> let ladder = evolvSeaLadder Evolving _ -> let ladder = evolvSeaLadder
(fromIntegral $ Vector.length $ getRoots phylo) (fromIntegral $ Vector.length $ getRoots phylo)
(getLevel phylo) (getLevel phylo)
(getRootsFreq phylo) (getRootsFreq phylo)
similarities simGraph similarities simGraph
in phylo & phylo_seaLadder .~ (if length ladder > 0 in phylo & phylo_seaLadder .~ (if length ladder > 0
then ladder then ladder
-- if we don't find any local maxima with the evolving strategy -- if we don't find any local maxima with the evolving strategy
else constSeaLadder 0.1 0.1 Set.empty) else constSeaLadder 0.1 0.1 Set.empty)
where where
-------- --------
-- 2) extract the values of the kinship links -- 2) extract the values of the kinship links
similarities :: Set Double similarities :: Set Double
similarities = Set.fromList $ sort $ map snd simGraph similarities = Set.fromList $ sort $ map snd simGraph
-------- --------
-- 1) we process an initial calculation of the kinship links -- 1) we process an initial calculation of the kinship links
-- this initial calculation is used to estimate the real sea ladder -- this initial calculation is used to estimate the real sea ladder
simGraph :: [((PhyloGroup,PhyloGroup),Double)] simGraph :: [((PhyloGroup,PhyloGroup),Double)]
simGraph = foldl' (\acc period -> simGraph = foldl' (\acc period ->
-- 1.1) process period by period -- 1.1) process period by period
let sources = getGroupsFromScalePeriods 1 [period] phylo let sources = getGroupsFromScalePeriods 1 [period] phylo
next = getNextPeriods ToParents 3 period (keys $ phylo ^. phylo_periods) next = getNextPeriods ToParents 3 period (keys $ phylo ^. phylo_periods)
targets = getGroupsFromScalePeriods 1 next phylo targets = getGroupsFromScalePeriods 1 next phylo
docs = filterDocs (getDocsByDate phylo) ([period] ++ next) docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next) diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel -- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = map (\source -> pairs = map (\source ->
let candidates = filter (\target -> (> 2) $ length let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target -> in map (\target ->
let nbDocs = (sum . elems) let nbDocs = (sum . elems)
$ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)]) $ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
diago = reduceDiagos diago = reduceDiagos
$ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)]) $ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target)) in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target))
) candidates ) candidates
...@@ -202,15 +202,15 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -202,15 +202,15 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
let pId = phyloLvl ^. phylo_scalePeriod let pId = phyloLvl ^. phylo_scalePeriod
pId' = phyloLvl ^. phylo_scalePeriodStr pId' = phyloLvl ^. phylo_scalePeriodStr
phyloCUnit = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj -> & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups) , f obj pId pId' lvl (length groups)
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])) (elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
phylo phylo
clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup
...@@ -227,16 +227,16 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" ...@@ -227,16 +227,16 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
----------------------- -----------------------
-- | To Phylo Step | -- -- | To Phylo Step | --
----------------------- -----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text) indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
indexDates' m = map (\docs -> indexDates' m = map (\docs ->
let ds = map (\d -> date' d) docs let ds = map (\d -> date' d) docs
f = if (null ds) f = if (null ds)
then "" then ""
else toFstDate ds else toFstDate ds
l = if (null ds) l = if (null ds)
then "" then ""
else toLstDate ds else toLstDate ds
in (f,l)) m in (f,l)) m
...@@ -250,8 +250,8 @@ joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap p ...@@ -250,8 +250,8 @@ joinRoots phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap p
rootsMap :: Map Int [PhyloGroupId] rootsMap :: Map Int [PhyloGroupId]
rootsMap = fromListWith (++) rootsMap = fromListWith (++)
$ concat -- flatten $ concat -- flatten
$ map (\g -> $ map (\g ->
map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g) map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
$ getGroupsFromScale 1 phylo $ getGroupsFromScale 1 phylo
...@@ -263,10 +263,9 @@ maybeDefaultParams phylo = if (defaultMode (getConfig phylo)) ...@@ -263,10 +263,9 @@ maybeDefaultParams phylo = if (defaultMode (getConfig phylo))
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering -- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink
toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
toPhyloWithoutLink docs conf = joinRoots toPhyloWithoutLink docs conf = joinRoots
$ findSeaLadder $ findSeaLadder
$ maybeDefaultParams $ maybeDefaultParams
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where where
...@@ -306,23 +305,23 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= ...@@ -306,23 +305,23 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
clq' = clq `using` parList rdeepseq clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs into a time map of Fis with some filters -- | To transform a time map of docs into a time map of Fis with some filters
toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering] toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -} {- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize) $ filterClique True s' (filterCliqueBySize)
{- \$ traceFis "Filtered by support" -} {- \$ traceFis "Filtered by support" -}
...@@ -332,33 +331,33 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -332,33 +331,33 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
MaxClique s _ _ -> filterClique True s (filterCliqueBySize) MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
seriesOfClustering seriesOfClustering
where where
-------------------------------------- --------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering] seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = map (\(prd,docs) -> let fis = map (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList _ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs) $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
) )
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
MaxClique _ thr filterType -> MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) -> let mcl = map (\(prd,docs) ->
let cooc = map round let cooc = map round
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc)) in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq mcl' = mcl `using` parList rdeepseq
in fromList mcl' in fromList mcl'
-------------------------------------- --------------------------------------
-- dev viz graph maxClique getMaxClique -- dev viz graph maxClique getMaxClique
...@@ -368,9 +367,9 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -368,9 +367,9 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
-------------------- --------------------
-- To transform the docs into a time map of coocurency matrix -- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt = docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
...@@ -389,8 +388,8 @@ docsToTimeScaleCooc docs fdt = ...@@ -389,8 +388,8 @@ docsToTimeScaleCooc docs fdt =
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc] groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc = groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs)) if ((null prds) || (null docs))
then acc then acc
else else
let prd = head' "groupBy" prds let prd = head' "groupBy" prds
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc) in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
...@@ -402,7 +401,7 @@ groupDocsByPeriod' f pds docs = ...@@ -402,7 +401,7 @@ groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n") in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods' $ fromList $ zip pds periods'
where where
-------------------------------------- --------------------------------------
...@@ -419,14 +418,14 @@ groupDocsByPeriod f pds es = ...@@ -419,14 +418,14 @@ groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n") in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods' $ fromList $ zip pds periods'
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) = inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h fst $ partition (\d -> f' d >= start && f' d <= end) h
-------------------------------------- --------------------------------------
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
...@@ -434,7 +433,7 @@ docsToTermFreq docs fdt = ...@@ -434,7 +433,7 @@ docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs let nbDocs = fromIntegral $ length docs
freqs = map (/(nbDocs)) freqs = map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
...@@ -442,39 +441,39 @@ docsToTermFreq docs fdt = ...@@ -442,39 +441,39 @@ docsToTermFreq docs fdt =
docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermCount docs roots = fromList docsToTermCount docs roots = fromList
$ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt = docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs let last = take n $ reverse $ sort $ map date docs
nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
freqs = map (/(nbDocs)) freqs = map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
-- To count the number of docs by unit of time -- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1 time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n") in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs' $ unionWith (+) time docs'
initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
initPhyloScales lvlMax pId = initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
setDefault :: PhyloConfig -> PhyloConfig setDefault :: PhyloConfig -> PhyloConfig
setDefault conf = conf { setDefault conf = conf {
phyloScale = 2, phyloScale = 2,
similarity = WeightedLogJaccard 0.5 2, similarity = WeightedLogJaccard 0.5 2,
findAncestors = True, findAncestors = True,
...@@ -491,7 +490,7 @@ setDefault conf = conf { ...@@ -491,7 +490,7 @@ setDefault conf = conf {
-- Init the basic elements of a Phylo -- Init the basic elements of a Phylo
-- --
initPhylo :: [Document] -> PhyloConfig -> Phylo initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf = initPhylo docs conf =
let roots = Vector.fromList $ nub $ concat $ map text docs let roots = Vector.fromList $ nub $ concat $ map text docs
foundations = PhyloFoundations roots empty foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
...@@ -499,12 +498,12 @@ initPhylo docs conf = ...@@ -499,12 +498,12 @@ initPhylo docs conf =
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermCount docs (foundations ^. foundations_roots)) (docsToTermCount docs (foundations ^. foundations_roots))
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
params = if (defaultMode conf) params = if (defaultMode conf)
then defaultPhyloParam { _phyloParam_config = setDefault conf } then defaultPhyloParam { _phyloParam_config = setDefault conf }
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
docsSources docsSources
docsCounts docsCounts
......
...@@ -172,11 +172,11 @@ toLstDate ds = snd ...@@ -172,11 +172,11 @@ toLstDate ds = snd
getTimeScale :: Phylo -> [Char] getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of getTimeScale p = case (timeUnit $ getConfig p) of
Epoch _ _ _ -> "epoch" Epoch {} -> "epoch"
Year _ _ _ -> "year" Year {} -> "year"
Month _ _ _ -> "month" Month {} -> "month"
Week _ _ _ -> "week" Week {} -> "week"
Day _ _ _ -> "day" Day {} -> "day"
-- | Get a regular & ascendante timeScale from a given list of dates -- | Get a regular & ascendante timeScale from a given list of dates
...@@ -188,27 +188,27 @@ toTimeScale dates step = ...@@ -188,27 +188,27 @@ toTimeScale dates step =
getTimeStep :: TimeUnit -> Int getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of getTimeStep time = case time of
Epoch _ s _ -> s Epoch { .. } -> _epoch_step
Year _ s _ -> s Year { .. } -> _year_step
Month _ s _ -> s Month { .. } -> _month_step
Week _ s _ -> s Week { .. } -> _week_step
Day _ s _ -> s Day { .. } -> _day_step
getTimePeriod :: TimeUnit -> Int getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of getTimePeriod time = case time of
Epoch p _ _ -> p Epoch { .. } -> _epoch_period
Year p _ _ -> p Year { .. } -> _year_period
Month p _ _ -> p Month { .. } -> _month_period
Week p _ _ -> p Week { .. } -> _week_period
Day p _ _ -> p Day { .. } -> _day_period
getTimeFrame :: TimeUnit -> Int getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of getTimeFrame time = case time of
Epoch _ _ f -> f Epoch { .. } -> _epoch_matchingFrame
Year _ _ f -> f Year { .. } -> _year_matchingFrame
Month _ _ f -> f Month { .. } -> _month_matchingFrame
Week _ _ f -> f Week { .. } -> _week_matchingFrame
Day _ _ f -> f Day { .. } -> _day_matchingFrame
------------- -------------
-- | Fis | -- -- | Fis | --
...@@ -220,7 +220,7 @@ isNested :: Eq a => [a] -> [a] -> Bool ...@@ -220,7 +220,7 @@ isNested :: Eq a => [a] -> [a] -> Bool
isNested l l' isNested l l'
| null l' = True | null l' = True
| length l' > length l = False | length l' > length l = False
| (union l l') == l = True | union l l' == l = True
| otherwise = False | otherwise = False
...@@ -251,8 +251,8 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " ...@@ -251,8 +251,8 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> "
traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n" <> "Support : " <> traceSupport mFis <> "\n"
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis <> "Nb Ngrams : " <> traceClique mFis <> "\n" ) mFis
---------------- ----------------
...@@ -323,7 +323,7 @@ findMaxima lst = map (hasMax) $ toChunk 3 lst ...@@ -323,7 +323,7 @@ findMaxima lst = map (hasMax) $ toChunk 3 lst
where where
------ ------
hasMax :: [(Double,Double)] -> Bool hasMax :: [(Double,Double)] -> Bool
hasMax chunk = hasMax chunk =
if (length chunk) /= 3 if (length chunk) /= 3
then False then False
else (snd(chunk !! 0) < snd(chunk !! 1)) && (snd(chunk !! 2) < snd(chunk !! 1)) else (snd(chunk !! 0) < snd(chunk !! 1)) && (snd(chunk !! 2) < snd(chunk !! 1))
...@@ -331,7 +331,7 @@ findMaxima lst = map (hasMax) $ toChunk 3 lst ...@@ -331,7 +331,7 @@ findMaxima lst = map (hasMax) $ toChunk 3 lst
-- | split a list into chunks of size n -- | split a list into chunks of size n
toChunk :: Int -> [a] -> [[a]] toChunk :: Int -> [a] -> [[a]]
toChunk n = takeWhile ((== n) . length) . transpose . take n . iterate tail toChunk n = takeWhile ((== n) . length) . transpose . take n . iterate tail
-- | To compute the average degree from a cooc matrix -- | To compute the average degree from a cooc matrix
...@@ -343,7 +343,7 @@ toAverageDegree cooc roots = 2 * (fromIntegral $ Map.size cooc) / (fromIntegral ...@@ -343,7 +343,7 @@ toAverageDegree cooc roots = 2 * (fromIntegral $ Map.size cooc) / (fromIntegral
-- | Use the giant component regime to estimate the default level -- | Use the giant component regime to estimate the default level
-- http://networksciencebook.com/chapter/3#networks-supercritical -- http://networksciencebook.com/chapter/3#networks-supercritical
regimeToDefaultLevel :: Cooc -> Vector Ngrams -> Double regimeToDefaultLevel :: Cooc -> Vector Ngrams -> Double
regimeToDefaultLevel cooc roots regimeToDefaultLevel cooc roots
| avg == 0 = 1 | avg == 0 = 1
| avg < 1 = avg * 0.6 | avg < 1 = avg * 0.6
| avg == 1 = 0.6 | avg == 1 = 0.6
...@@ -356,26 +356,26 @@ regimeToDefaultLevel cooc roots ...@@ -356,26 +356,26 @@ regimeToDefaultLevel cooc roots
lnN = log (fromIntegral $ Vector.length roots) lnN = log (fromIntegral $ Vector.length roots)
coocToConfidence :: Phylo -> Cooc coocToConfidence :: Phylo -> Cooc
coocToConfidence phylo = coocToConfidence phylo =
let count = getRootsCount phylo let count = getRootsCount phylo
cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty
$ elems $ getCoocByDate phylo $ elems $ getCoocByDate phylo
in Map.mapWithKey (\(a,b) w -> confidence a b w count) cooc in Map.mapWithKey (\(a,b) w -> confidence a b w count) cooc
where where
---- ----
-- confidence -- confidence
confidence :: Int -> Int -> Double -> Map Int Double -> Double confidence :: Int -> Int -> Double -> Map Int Double -> Double
confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)] confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)]
sumtest :: [Int] -> [Int] -> Int sumtest :: [Int] -> [Int] -> Int
sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2) sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2)
findDefaultLevel :: Phylo -> Phylo findDefaultLevel :: Phylo -> Phylo
findDefaultLevel phylo = findDefaultLevel phylo =
let confidence = Map.filterWithKey (\(a,b) _ -> a /= b) let confidence = Map.filterWithKey (\(a,b) _ -> a /= b)
$ Map.filter (> 0.01) $ Map.filter (> 0.01)
$ coocToConfidence phylo $ coocToConfidence phylo
roots = getRoots phylo roots = getRoots phylo
level = regimeToDefaultLevel confidence roots level = regimeToDefaultLevel confidence roots
...@@ -488,7 +488,7 @@ getPhyloSeaRiseSteps :: Phylo -> Double ...@@ -488,7 +488,7 @@ getPhyloSeaRiseSteps :: Phylo -> Double
getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
Constante _ s -> s Constante _ s -> s
Adaptative s -> s Adaptative s -> s
Evolving _ -> 0.1 Evolving _ -> 0.1
getConfig :: Phylo -> PhyloConfig getConfig :: Phylo -> PhyloConfig
...@@ -501,10 +501,10 @@ getLadder :: Phylo -> [Double] ...@@ -501,10 +501,10 @@ getLadder :: Phylo -> [Double]
getLadder phylo = phylo ^. phylo_seaLadder getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate :: Phylo -> Map Date Cooc getCoocByDate :: Phylo -> Map Date Cooc
getCoocByDate phylo = coocByDate (phylo ^. phylo_counts) getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
getDocsByDate :: Phylo -> Map Date Double getDocsByDate :: Phylo -> Map Date Double
getDocsByDate phylo = docsByDate (phylo ^. phylo_counts) getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
getRootsCount :: Phylo -> Map Int Double getRootsCount :: Phylo -> Map Int Double
getRootsCount phylo = rootsCount (phylo ^. phylo_counts) getRootsCount phylo = rootsCount (phylo ^. phylo_counts)
...@@ -599,10 +599,10 @@ updatePeriods periods' phylo = ...@@ -599,10 +599,10 @@ updatePeriods periods' phylo =
) phylo ) phylo
updateQuality :: Double -> Phylo -> Phylo updateQuality :: Double -> Phylo -> Phylo
updateQuality quality phylo = phylo { _phylo_quality = quality } updateQuality quality phylo = phylo { _phylo_quality = quality }
updateLevel :: Double -> Phylo -> Phylo updateLevel :: Double -> Phylo -> Phylo
updateLevel level phylo = phylo { _phylo_level = level } updateLevel level phylo = phylo { _phylo_level = level }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
...@@ -697,7 +697,7 @@ getMinSharedNgrams :: PhyloSimilarity -> Int ...@@ -697,7 +697,7 @@ getMinSharedNgrams :: PhyloSimilarity -> Int
getMinSharedNgrams proxi = case proxi of getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m WeightedLogSim _ m -> m
Hamming _ _ -> undefined Hamming _ _ -> undefined
---------------- ----------------
-- | Branch | -- -- | Branch | --
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment