Commit 99b5de7d authored by qlobbe's avatar qlobbe

ready for quality score

parent 92b4221b
Pipeline #554 failed with stage
...@@ -87,7 +87,6 @@ data Config = ...@@ -87,7 +87,6 @@ data Config =
, corpusParser :: CorpusParser , corpusParser :: CorpusParser
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloQuality :: Double
, phyloProximity :: Proximity , phyloProximity :: Proximity
, timeUnit :: TimeUnit , timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit , contextualUnit :: ContextualUnit
...@@ -103,7 +102,6 @@ defaultConfig = ...@@ -103,7 +102,6 @@ defaultConfig =
, corpusParser = Csv 1000 , corpusParser = Csv 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloQuality = 0.5
, phyloProximity = WeightedLogJaccard 10 0 0.05 , phyloProximity = WeightedLogJaccard 10 0 0.05
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4 , contextualUnit = Fis 2 4
......
...@@ -66,7 +66,7 @@ phyloBase = toPhyloBase docs mapList config ...@@ -66,7 +66,7 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc :: Map Date Cooc phyloCooc :: Map Date Cooc
phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) config phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots)
periods :: [(Date,Date)] periods :: [(Date,Date)]
......
...@@ -173,8 +173,8 @@ ngramsToCooc ngrams coocs = ...@@ -173,8 +173,8 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix -- | To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt conf = docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
...@@ -229,7 +229,7 @@ toPhyloBase docs lst conf = ...@@ -229,7 +229,7 @@ toPhyloBase docs lst conf =
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
(docsToTimeScaleCooc docs (foundations ^. foundations_roots) conf) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
...@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where ...@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn) import Data.List (sort, concat, null, union, (++), tails, sortOn)
import Data.Set (Set, size) import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), toList) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!))
import Data.String (String) import Data.String (String)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -156,12 +156,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l ...@@ -156,12 +156,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
getFisSupport :: ContextualUnit -> Int getFisSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of getFisSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support") -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
getFisSize :: ContextualUnit -> Int getFisSize :: ContextualUnit -> Int
getFisSize unit = case unit of getFisSize unit = case unit of
Fis _ s -> s Fis _ s -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size") -- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
-------------- --------------
...@@ -255,10 +255,10 @@ updatePhyloGroups lvl m phylo = ...@@ -255,10 +255,10 @@ updatePhyloGroups lvl m phylo =
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link] pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
mergeLinks :: [Link] -> [Link] -> [Link] -- mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks toChilds toParents = -- mergeLinks toChilds toParents =
let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds -- let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
in toList $ unionWith max (fromList toParents) toChilds' -- in toList $ unionWith max (fromList toParents) toChilds'
------------------- -------------------
......
...@@ -16,8 +16,8 @@ Portability : POSIX ...@@ -16,8 +16,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.SynchronicClustering where module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo -- import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools -- import Gargantext.Viz.Phylo.PhyloTools
import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat) import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat)
......
...@@ -15,8 +15,8 @@ Portability : POSIX ...@@ -15,8 +15,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub, union) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, nub, union)
import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith, member, (!)) import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
...@@ -27,7 +27,6 @@ import Control.Lens hiding (Level) ...@@ -27,7 +27,6 @@ import Control.Lens hiding (Level)
import qualified Data.Set as Set import qualified Data.Set as Set
------------------- -------------------
-- | Proximity | -- -- | Proximity | --
------------------- -------------------
...@@ -79,6 +78,13 @@ pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of ...@@ -79,6 +78,13 @@ pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
Hamming -> undefined Hamming -> undefined
filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ _ _ -> local >= thr
Hamming -> undefined
-- | To process the proximity between a current group and a pair of targets group -- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs proximity group target target' = toProximity docs proximity group target target' =
...@@ -98,8 +104,8 @@ toProximity docs proximity group target target' = ...@@ -98,8 +104,8 @@ toProximity docs proximity group target target' =
-- | Find pairs of valuable candidates to be matched -- | Find pairs of valuable candidates to be matched
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)] makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods docs group = case null periods of makePairs candidates periods = case null periods of
True -> [] True -> []
-- | at least on of the pair candidates should be from the last added period -- | at least on of the pair candidates should be from the last added period
False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods) False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
...@@ -110,8 +116,8 @@ makePairs candidates periods docs group = case null periods of ...@@ -110,8 +116,8 @@ makePairs candidates periods docs group = case null periods of
inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds) inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> PhyloGroup -> PhyloGroup phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double-> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil proxi docs group = case pointers of phyloGroupMatching candidates fil proxi docs thr group = case pointers of
Nothing -> addPointers group fil TemporalPointer [] Nothing -> addPointers group fil TemporalPointer []
Just pts -> addPointers group fil TemporalPointer Just pts -> addPointers group fil TemporalPointer
$ head' "phyloGroupMatching" $ head' "phyloGroupMatching"
...@@ -125,8 +131,9 @@ phyloGroupMatching candidates fil proxi docs group = case pointers of ...@@ -125,8 +131,9 @@ phyloGroupMatching candidates fil proxi docs group = case pointers of
-- | for each time frame, process the proximity on relevant pairs of targeted groups -- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
pairs = makePairs (concat groups) periods docs group pairs = makePairs (concat groups) periods
in acc ++ ( concat in acc ++ ( filter (\(_,proximity) -> filterProximity proxi thr proximity)
$ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
let proximity = toProximity (filterDocs docs periods) proxi group c c' let proximity = toProximity (filterDocs docs periods) proxi group c c'
...@@ -141,51 +148,16 @@ phyloGroupMatching candidates fil proxi docs group = case pointers of ...@@ -141,51 +148,16 @@ phyloGroupMatching candidates fil proxi docs group = case pointers of
filterDocs d pds = restrictKeys d $ periodsToYears pds filterDocs d pds = restrictKeys d $ periodsToYears pds
------------------
-- | Pointers | --
------------------
-- ghostHunter :: [[PhyloGroup]] -> [[PhyloGroup]]
-- ghostHunter branches =
-- map (\branch ->
-- -- | il manque une référence au group source de chaque pointer
-- let pointers = elems $ fromList
-- $ map (\pt -> (groupIds ! (fst pt),pt))
-- $ filter (\pt -> member (fst pt) groupIds) $ concat $ map (\g -> g ^. phylo_groupGhostPointers) branch
-- in undefined
-- ) branches
-- where
-- groupIds :: Map PhyloGroupId Int
-- groupIds = fromList $ map (\g -> (getGroupId g, last' "ghostHunter" $ snd $ g ^. phylo_groupBranchId)) $ concat branches
-- --------------------------------------
-- selectBest :: [Pointers] -> [Pointers]
-- se
filterPointers :: Double -> [PhyloGroup] -> [PhyloGroup]
filterPointers thr groups =
map (\group ->
let ghosts = filter (\(_,w) -> w < thr) $ group ^. phylo_groupPeriodParents
in group & phylo_groupPeriodParents %~ (filter (\(_,w) -> w >= thr))
& phylo_groupPeriodChilds %~ (filter (\(_,w) -> w >= thr))
& phylo_groupGhostPointers %~ (++ ghosts)
) groups
----------------------------- -----------------------------
-- | Adaptative Matching | -- -- | Matching Processing | --
----------------------------- -----------------------------
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId] getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods fil max pId pIds = getNextPeriods fil max' pId pIds =
case fil of case fil of
ToChilds -> take max $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
ToParents -> take max $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]] getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]]
...@@ -202,12 +174,25 @@ getCandidates fil g pIds targets = ...@@ -202,12 +174,25 @@ getCandidates fil g pIds targets =
$ map (\g' -> (g' ^. phylo_groupPeriod,[g'])) targets $ map (\g' -> (g' ^. phylo_groupPeriod,[g'])) targets
shouldBreak :: Double -> [(Double,[PhyloGroup])] -> Bool processMatching :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup]
shouldBreak thr branches = any (\(quality,_) -> quality < thr) branches processMatching max' periods proximity thr docs groups =
map (\group ->
let childs = getCandidates ToChilds group
(getNextPeriods ToChilds max' (group ^. phylo_groupPeriod) periods) groups
parents = getCandidates ToParents group
(getNextPeriods ToParents max' (group ^. phylo_groupPeriod) periods) groups
in phyloGroupMatching parents ToParents proximity docs thr
$ phyloGroupMatching childs ToChilds proximity docs thr group
) groups
-----------------------------
-- | Adaptative Matching | --
-----------------------------
toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])] toPhyloQuality :: [[PhyloGroup]] -> Double
toBranchQuality branches = undefined toPhyloQuality _ = undefined
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
...@@ -225,64 +210,42 @@ groupsToBranches groups = ...@@ -225,64 +210,42 @@ groupsToBranches groups =
) graph ) graph
recursiveMatching :: Proximity -> Double -> Int -> [PhyloPeriodId] -> Map Date Double -> Double -> [PhyloGroup] -> [PhyloGroup]
-- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId recursiveMatching proximity thr max' periods docs quality groups =
case quality < quality' of
adaptativeMatching :: Proximity -> Double -> Double -> [PhyloGroup] -> [PhyloGroup] -- | success : we localy improve the quality of the branch, let's go deeper
adaptativeMatching proximity thr thrQua groups = True -> concat
-- | check if we should break some of the new branches or not $ map (\branch ->
case shouldBreak thrQua branches' of recursiveMatching proximity (thr + (getThresholdStep proximity)) max' periods docs quality' branch
True -> concat $ map (\(s,b) -> ) branches
if s >= thrQua -- | failure : last step was the local maximum, let's validate it
-- | we keep the branch as it is False -> groups
then b
-- | we break the branch using an increased temporal matching threshold
else let nextGroups = undefined
in adaptativeMatching proximity (thr + (getThresholdStep proximity)) thrQua nextGroups
) branches'
-- | the quality of all the new branches is sufficient
False -> concat branches
where where
-- | 3) process a quality score for each new branch -- | 3) process a quality score on the local set of branches
branches' :: [(Double,[PhyloGroup])] quality' :: Double
branches' = toBranchQuality branches quality' = toPhyloQuality branches
-- | 2) group the new groups into branches -- | 2) group the new groups into branches
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups' branches = groupsToBranches $ fromList $ map (\group -> (getGroupId group, group)) groups'
-- | 1) filter the pointers of each groups regarding the current state of the quality threshold -- | 1) process a temporal matching for each group
groups' :: [PhyloGroup] groups' :: [PhyloGroup]
groups' = filterPointers thr groups groups' = processMatching max' periods proximity thr docs groups
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
temporalMatching phylo = updatePhyloGroups 1 branches phylo temporalMatching phylo = updatePhyloGroups 1 branches phylo
where where
-- | 4) find the ghost links and postprocess the branches -- | 2) run the recursive matching to find the best repartition among branches
branches' :: Map PhyloGroupId PhyloGroup
branches' = undefined
-- | 3) run the adaptative matching to find the best repartition among branches
branches :: Map PhyloGroupId PhyloGroup branches :: Map PhyloGroupId PhyloGroup
branches = fromList branches = fromList
$ map (\g -> (getGroupId g, g)) $ map (\g -> (getGroupId g, g))
$ adaptativeMatching proximity (getThresholdInit proximity) (phyloQuality $ getConfig phylo) groups' $ recursiveMatching (phyloProximity $ getConfig phylo)
-- | 2) for each group process an initial temporal Matching (getThresholdInit $ phyloProximity $ getConfig phylo)
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs) (toPhyloQuality [groups']) groups'
-- | 1) for each group process an initial temporal Matching
groups' :: [PhyloGroup] groups' :: [PhyloGroup]
groups' = groups' = processMatching (getTimeFrame $ timeUnit $ getConfig phylo) (getPeriodIds phylo)
let maxTime = getTimeFrame $ timeUnit $ getConfig phylo (phyloProximity $ getConfig phylo) (getThresholdInit $ phyloProximity $ getConfig phylo)
periods = getPeriodIds phylo (phylo ^. phylo_timeDocs) (getGroupsFromLevel 1 phylo)
docs = phylo ^. phylo_timeDocs
--------------------------------------
in map (\group ->
let childs = getCandidates ToChilds group
(getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) groups
parents = getCandidates ToParents group
(getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) groups
in phyloGroupMatching parents ToParents proximity docs
$ phyloGroupMatching childs ToChilds proximity docs group
) groups
-- | 1) start with all the groups from a given level
groups :: [PhyloGroup]
groups = getGroupsFromLevel 1 phylo
--------------------------------------
proximity :: Proximity
proximity = phyloProximity $ getConfig phylo
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment