Commit 92b4221b authored by qlobbe's avatar qlobbe

add branching

parent d3097207
...@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList ...@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType -- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusParser -> Int -> FilePath -> IO ([(Int,Text)]) fileToCorpus :: CorpusParser -> FilePath -> IO ([(Int,Text)])
fileToCorpus parser limit path = case parser of fileToCorpus parser path = case parser of
Wos -> wosToCorpus limit path Wos limit -> wosToCorpus limit path
Csv -> csvToCorpus limit path Csv limit -> csvToCorpus limit path
-- | To parse a file into a list of Document -- | To parse a file into a list of Document
fileToDocs :: CorpusParser -> Int -> FilePath -> TermList -> IO [Document] fileToDocs :: CorpusParser -> FilePath -> TermList -> IO [Document]
fileToDocs parser limit path lst = do fileToDocs parser path lst = do
corpus <- fileToCorpus parser limit path corpus <- fileToCorpus parser path
let patterns = buildPatterns lst let patterns = buildPatterns lst
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
...@@ -162,7 +162,7 @@ main = do ...@@ -162,7 +162,7 @@ main = do
printIOMsg "Parse the corpus" printIOMsg "Parse the corpus"
mapList <- csvGraphTermList (listPath config) mapList <- csvGraphTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusLimit config) (corpusPath config) mapList corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus") printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the Phylo" printIOMsg "Reconstruct the Phylo"
......
...@@ -50,46 +50,63 @@ import Control.Lens (makeLenses) ...@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
---------------- ----------------
data CorpusParser = Wos | Csv deriving (Show,Generic,Eq) data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq)
data Proximity = WeightedLogJaccard {_sensibility :: Double} data Proximity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double }
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data TimeUnit =
Year
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data ContextualUnit =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
deriving (Show,Generic,Eq)
data Config = data Config =
Config { corpusPath :: FilePath Config { corpusPath :: FilePath
, listPath :: FilePath , listPath :: FilePath
, outputPath :: FilePath , outputPath :: FilePath
, corpusParser :: CorpusParser , corpusParser :: CorpusParser
, corpusLimit :: Int
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloQuality :: Double
, phyloProximity :: Proximity , phyloProximity :: Proximity
, timeUnit :: Int , timeUnit :: TimeUnit
, maxTimeMatch :: Int , contextualUnit :: ContextualUnit
, timePeriod :: Int
, timeStep :: Int
, fisSupport :: Int
, fisSize :: Int
, branchSize :: Int , branchSize :: Int
} deriving (Show,Generic,Eq) } deriving (Show,Generic,Eq)
defaultConfig :: Config defaultConfig :: Config
defaultConfig = defaultConfig =
Config { corpusPath = "" Config { corpusPath = ""
, listPath = "" , listPath = ""
, outputPath = "" , outputPath = ""
, corpusParser = Csv , corpusParser = Csv 1000
, corpusLimit = 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 , phyloQuality = 0.5
, timeUnit = 1 , phyloProximity = WeightedLogJaccard 10 0 0.05
, maxTimeMatch = 5 , timeUnit = Year 3 1 5
, timePeriod = 3 , contextualUnit = Fis 2 4
, timeStep = 1
, fisSupport = 2
, fisSize = 4
, branchSize = 3 , branchSize = 3
} }
...@@ -99,6 +116,10 @@ instance FromJSON CorpusParser ...@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON ContextualUnit
instance ToJSON ContextualUnit
-- | Software parameters -- | Software parameters
...@@ -237,7 +258,7 @@ data PhyloGroup = ...@@ -237,7 +258,7 @@ data PhyloGroup =
, _phylo_groupLevelChilds :: [Pointer] , _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupBreakPointer :: Maybe Pointer , _phylo_groupGhostPointers :: [Pointer]
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis ...@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
---------------- ----------------
makeLenses ''Config makeLenses ''Config
makeLenses ''Proximity
makeLenses ''ContextualUnit
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloFis makeLenses ''PhyloFis
makeLenses ''Phylo makeLenses ''Phylo
......
...@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase ...@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docsByPeriods (fisSupport config) (fisSize config) phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
...@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config ...@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc :: Map Date Cooc phyloCooc :: Map Date Cooc
phyloCooc = docsToCoocByYear docs (foundations ^. foundations_roots) config phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) config
periods :: [(Date,Date)] periods :: [(Date,Date)]
periods = toPeriods (sort $ nub $ map date docs) (timePeriod config) (timeStep config) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit config) (getTimeStep $ timeUnit config)
nbDocsByYear :: Map Date Double nbDocsByYear :: Map Date Double
nbDocsByYear = nbDocsByTime docs (timeUnit config) nbDocsByYear = docsToTimeScaleNb docs
config :: Config config :: Config
config = config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, branchSize = 0 , branchSize = 0
, fisSupport = 0 , contextualUnit = Fis 0 0 }
, fisSize = 0 }
docs :: [Document] docs :: [Document]
......
...@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs = ...@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[]) (1,[])
[] [] [] [] [] [] [] [] []
Nothing
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
...@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase ...@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase) phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase)
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
...@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs = ...@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix -- | To transform the docs into a time map of coocurency matrix
docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
docsToCoocByYear docs fdt conf = docsToTimeScaleCooc docs fdt conf =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList mCooc' = fromList
$ map (\t -> (t,empty)) $ map (\t -> (t,empty))
$ toTimeScale (map date docs) (timeUnit conf) $ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n") in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc' $ unionWith sumCooc mCooc mCooc'
...@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es = ...@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
-------------------------------------- --------------------------------------
-- | To count the number of docs by unit of time (like a year) -- | To count the number of docs by unit of time
nbDocsByTime :: [Document] -> Int -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
nbDocsByTime docs step = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') step time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n") in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs' $ unionWith (+) time docs'
...@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo ...@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf = toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
params = defaultPhyloParam { _phyloParam_config = conf } params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (timePeriod conf) (timeStep conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
(docsToCoocByYear docs (foundations ^. foundations_roots) conf) (docsToTimeScaleCooc docs (foundations ^. foundations_roots) conf)
(nbDocsByTime docs $ timeUnit conf) (docsToTimeScaleNb docs)
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
...@@ -91,6 +91,18 @@ toTimeScale dates step = ...@@ -91,6 +91,18 @@ toTimeScale dates step =
in [start, (start + step) .. end] in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Year _ s _ -> s
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Year p _ _ -> p
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Year _ _ f -> f
------------- -------------
-- | Fis | -- -- | Fis | --
------------- -------------
...@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l ...@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of
Fis s _ -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
getFisSize :: ContextualUnit -> Int
getFisSize unit = case unit of
Fis _ s -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
-------------- --------------
-- | Cooc | -- -- | Cooc | --
-------------- --------------
...@@ -223,6 +251,7 @@ updatePhyloGroups lvl m phylo = ...@@ -223,6 +251,7 @@ updatePhyloGroups lvl m phylo =
-- | Pointers | -- -- | Pointers | --
------------------ ------------------
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link] pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
...@@ -230,3 +259,23 @@ mergeLinks :: [Link] -> [Link] -> [Link] ...@@ -230,3 +259,23 @@ mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks toChilds toParents = mergeLinks toChilds toParents =
let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
in toList $ unionWith max (fromList toParents) toChilds' in toList $ unionWith max (fromList toParents) toChilds'
-------------------
-- | Proximity | --
-------------------
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
\ No newline at end of file
...@@ -19,11 +19,19 @@ import Gargantext.Prelude ...@@ -19,11 +19,19 @@ import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat)
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
-------------------- --------------------
relatedComponents :: [PhyloGroup] -> [[PhyloGroup]] relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents groups = undefined relatedComponents graphs = foldl' (\mem groups ->
\ No newline at end of file if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
\ No newline at end of file
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub, union) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub, union)
import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith) import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith, member, (!))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
...@@ -25,6 +25,8 @@ import Gargantext.Viz.Phylo.SynchronicClustering ...@@ -25,6 +25,8 @@ import Gargantext.Viz.Phylo.SynchronicClustering
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import qualified Data.Set as Set
------------------- -------------------
-- | Proximity | -- -- | Proximity | --
...@@ -73,7 +75,7 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams' ...@@ -73,7 +75,7 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
-- | To choose a proximity function -- | To choose a proximity function
pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard sens -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams' WeightedLogJaccard sens _ _ -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
Hamming -> undefined Hamming -> undefined
...@@ -96,23 +98,20 @@ toProximity docs proximity group target target' = ...@@ -96,23 +98,20 @@ toProximity docs proximity group target target' =
-- | Find pairs of valuable candidates to be matched -- | Find pairs of valuable candidates to be matched
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> Proximity -> PhyloGroup -> [(PhyloGroup,PhyloGroup)] makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods thr docs proximity group = case null periods of makePairs candidates periods docs group = case null periods of
True -> [] True -> []
-- | at least on of the pair candidates should be from the last added period -- | at least on of the pair candidates should be from the last added period
False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods) False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
|| (inLastPeriod cdt' periods)) || (inLastPeriod cdt' periods))
$ listToKeys $ listToKeys candidates
-- | remove poor candidates from previous periods
$ filter (\cdt -> (inLastPeriod cdt periods)
|| ((toProximity (reframeDocs docs periods) proximity group group cdt) >= thr)) candidates
where where
inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds) inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil thr docs proxi group = case pointers of phyloGroupMatching candidates fil proxi docs group = case pointers of
Nothing -> addPointers group fil TemporalPointer [] Nothing -> addPointers group fil TemporalPointer []
Just pts -> addPointers group fil TemporalPointer Just pts -> addPointers group fil TemporalPointer
$ head' "phyloGroupMatching" $ head' "phyloGroupMatching"
...@@ -126,18 +125,55 @@ phyloGroupMatching candidates fil thr docs proxi group = case pointers of ...@@ -126,18 +125,55 @@ phyloGroupMatching candidates fil thr docs proxi group = case pointers of
-- | for each time frame, process the proximity on relevant pairs of targeted groups -- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
pairs = makePairs (concat groups) periods thr docs proxi group pairs = makePairs (concat groups) periods docs group
in acc ++ ( filter (\(_,proximity) -> proximity >= thr ) in acc ++ ( concat
$ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
let proximity = toProximity (reframeDocs docs periods) proxi group c c' let proximity = toProximity (filterDocs docs periods) proxi group c c'
in if (c == c') in if (c == c')
then [(getGroupId c,proximity)] then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs) else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
) [] ) []
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...] -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$ inits candidates $ inits candidates
--------------------------------------
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
------------------
-- | Pointers | --
------------------
-- ghostHunter :: [[PhyloGroup]] -> [[PhyloGroup]]
-- ghostHunter branches =
-- map (\branch ->
-- -- | il manque une référence au group source de chaque pointer
-- let pointers = elems $ fromList
-- $ map (\pt -> (groupIds ! (fst pt),pt))
-- $ filter (\pt -> member (fst pt) groupIds) $ concat $ map (\g -> g ^. phylo_groupGhostPointers) branch
-- in undefined
-- ) branches
-- where
-- groupIds :: Map PhyloGroupId Int
-- groupIds = fromList $ map (\g -> (getGroupId g, last' "ghostHunter" $ snd $ g ^. phylo_groupBranchId)) $ concat branches
-- --------------------------------------
-- selectBest :: [Pointers] -> [Pointers]
-- se
filterPointers :: Double -> [PhyloGroup] -> [PhyloGroup]
filterPointers thr groups =
map (\group ->
let ghosts = filter (\(_,w) -> w < thr) $ group ^. phylo_groupPeriodParents
in group & phylo_groupPeriodParents %~ (filter (\(_,w) -> w >= thr))
& phylo_groupPeriodChilds %~ (filter (\(_,w) -> w >= thr))
& phylo_groupGhostPointers %~ (++ ghosts)
) groups
----------------------------- -----------------------------
...@@ -174,14 +210,26 @@ toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])] ...@@ -174,14 +210,26 @@ toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
toBranchQuality branches = undefined toBranchQuality branches = undefined
reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
reframeDocs docs periods = restrictKeys docs $ periodsToYears periods groupsToBranches groups =
-- | run the related component algorithm
let graph = zip [1..]
$ relatedComponents
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- | update each group's branch id
in map (\(bId,ids) ->
map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList ids)
) graph
-- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId -- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId
adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup] adaptativeMatching :: Proximity -> Double -> Double -> [PhyloGroup] -> [PhyloGroup]
adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candidates periods = adaptativeMatching proximity thr thrQua groups =
-- | check if we should break some of the new branches or not -- | check if we should break some of the new branches or not
case shouldBreak thrQua branches' of case shouldBreak thrQua branches' of
True -> concat $ map (\(s,b) -> True -> concat $ map (\(s,b) ->
...@@ -190,12 +238,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida ...@@ -190,12 +238,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
then b then b
-- | we break the branch using an increased temporal matching threshold -- | we break the branch using an increased temporal matching threshold
else let nextGroups = undefined else let nextGroups = undefined
nextCandidates = undefined in adaptativeMatching proximity (thr + (getThresholdStep proximity)) thrQua nextGroups
nextPeriods = undefined
in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua
(reframeDocs docs nextPeriods)
proximity
nextGroups nextCandidates nextPeriods
) branches' ) branches'
-- | the quality of all the new branches is sufficient -- | the quality of all the new branches is sufficient
False -> concat branches False -> concat branches
...@@ -205,25 +248,41 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida ...@@ -205,25 +248,41 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
branches' = toBranchQuality branches branches' = toBranchQuality branches
-- | 2) group the new groups into branches -- | 2) group the new groups into branches
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = relatedComponents groups' branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups'
-- | 1) connect each group to its parents and childs -- | 1) filter the pointers of each groups regarding the current state of the quality threshold
groups' :: [PhyloGroup] groups' :: [PhyloGroup]
groups' = map (\group -> groups' = filterPointers thr groups
let childs = getCandidates ToChilds group
(getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) candidates
parents = getCandidates ToParents group
(getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates
-- | match the group to its possible childs then parents
in phyloGroupMatching parents ToParents thrMatch docs proximity
$ phyloGroupMatching childs ToChilds thrMatch docs proximity group
) groups
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
temporalMatching phylo = temporalMatching phylo = updatePhyloGroups 1 branches phylo
let branches = fromList $ map (\g -> (getGroupId g, g)) where
$ adaptativeMatching (maxTimeMatch $ getConfig phylo) 0 0 0 -- | 4) find the ghost links and postprocess the branches
(phylo ^. phylo_timeDocs) branches' :: Map PhyloGroupId PhyloGroup
(phyloProximity $ getConfig phylo) branches' = undefined
(getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo) -- | 3) run the adaptative matching to find the best repartition among branches
in updatePhyloGroups 1 branches phylo branches :: Map PhyloGroupId PhyloGroup
\ No newline at end of file branches = fromList
$ map (\g -> (getGroupId g, g))
$ adaptativeMatching proximity (getThresholdInit proximity) (phyloQuality $ getConfig phylo) groups'
-- | 2) for each group process an initial temporal Matching
groups' :: [PhyloGroup]
groups' =
let maxTime = getTimeFrame $ timeUnit $ getConfig phylo
periods = getPeriodIds phylo
docs = phylo ^. phylo_timeDocs
--------------------------------------
in map (\group ->
let childs = getCandidates ToChilds group
(getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) groups
parents = getCandidates ToParents group
(getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) groups
in phyloGroupMatching parents ToParents proximity docs
$ phyloGroupMatching childs ToChilds proximity docs group
) groups
-- | 1) start with all the groups from a given level
groups :: [PhyloGroup]
groups = getGroupsFromLevel 1 phylo
--------------------------------------
proximity :: Proximity
proximity = phyloProximity $ getConfig phylo
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment