Commit e4e913ab authored by qlobbe's avatar qlobbe

working on temporal matching

parent 91e81646
Pipeline #545 failed with stage
......@@ -61,6 +61,7 @@ data Config =
, phyloName :: Text
, phyloLevel :: Int
, timeUnit :: Int
, timeMatching :: Int
, timePeriod :: Int
, timeStep :: Int
, fisSupport :: Int
......@@ -78,6 +79,7 @@ defaultConfig =
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, timeUnit = 1
, timeMatching = 5
, timePeriod = 3
, timeStep = 1
, fisSupport = 2
......@@ -234,7 +236,10 @@ data PhyloGroup =
type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
type Pointer = (PhyloGroupId, Weight)
data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
---------------------------
......
......@@ -18,17 +18,19 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn)
import Data.Set (size)
import Data.Map (Map, elems, fromList, unionWith, keys)
import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!))
import Data.String (String)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Debug.Trace (trace)
import Control.Lens
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.List as List
import qualified Data.Set as Set
--------------
-- | Misc | --
......@@ -36,7 +38,13 @@ import qualified Data.Vector as Vector
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
countSup s l = length $ filter (>s) l
elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
Just i -> i
---------------------
......@@ -57,6 +65,11 @@ ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
-- | Time | --
--------------
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
findBounds :: [Date] -> (Date,Date)
findBounds dates =
......@@ -134,7 +147,7 @@ listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
listToEqual' :: Eq a => [a] -> [(a,a)]
listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
listToKeys :: [Int] -> [(Int,Int)]
listToKeys :: Eq a => [a] -> [(a,a)]
listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double
......@@ -143,11 +156,28 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc'
---------------
-- | Phylo | --
---------------
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
---------------
-- | Phylo | --
---------------
addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
addPointers group fil pty pointers =
case pty of
TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds %~ (++ pointers)
ToParents -> group & phylo_groupPeriodParents %~ (++ pointers)
LevelPointer -> case fil of
ToChilds -> group & phylo_groupLevelChilds %~ (++ pointers)
ToParents -> group & phylo_groupLevelParents %~ (++ pointers)
getPeriodIds :: Phylo -> [(Date,Date)]
getPeriodIds phylo = sortOn fst
$ keys
......@@ -159,4 +189,31 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
\ No newline at end of file
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo =
over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups
. traverse
) (\group ->
let id = getGroupId group
in
if member id m
then m ! id
else group ) phylo
\ No newline at end of file
......@@ -13,4 +13,17 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.SynchronicClustering where
\ No newline at end of file
module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
--------------------
-- | Clustering | --
--------------------
relatedComponents :: [PhyloGroup] -> [[PhyloGroup]]
relatedComponents groups = undefined
\ No newline at end of file
......@@ -13,4 +13,179 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.TemporalMatching where
\ No newline at end of file
module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub)
import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.SynchronicClustering
import Control.Lens hiding (Level)
-------------------
-- | Proximity | --
-------------------
-- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
-- periodsToNbDocs prds phylo = sum $ elems
-- $ restrictKeys (phylo ^. phylo_docsByYears)
-- $ periodsToYears prds
-- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
-- matchWithPairs g1 (g2,g3) p =
-- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
-- cooc = if (g2 == g3)
-- then getGroupCooc g2
-- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
-- ngrams = if (g2 == g3)
-- then getGroupNgrams g2
-- else union (getGroupNgrams g2) (getGroupNgrams g3)
-- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
toProximity :: Map Date Double -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs group target target' =
let nbDocs = sum $ elems docs
in undefined
------------------------
-- | Local Matching | --
------------------------
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods thr docs group = case null periods of
True -> []
-- | at least on of the pair candidates should be from the last added period
False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
|| (inLastPeriod cdt' periods))
$ listToKeys
-- | remove poor candidates from previous periods
$ filter (\cdt -> (inLastPeriod cdt periods)
|| ((toProximity (reframeDocs docs periods) group group cdt) >= thr)) candidates
where
inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil thr docs group = case pointers of
Nothing -> addPointers group fil TemporalPointer []
Just pts -> addPointers group fil TemporalPointer
$ head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
pointers :: Maybe [Pointer]
pointers = find (not . null)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups ->
let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
pairs = makePairs (concat groups) periods thr docs group
in acc ++ ( filter (\(_,proximity) -> proximity >= thr )
$ concat
$ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates
let proximity = toProximity (reframeDocs docs periods) group c c'
in if (c == c')
then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
) []
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$ inits candidates
matchGroupToGroups :: [[PhyloGroup]] -> PhyloGroup -> PhyloGroup
matchGroupToGroups candidates group = undefined
-----------------------------
-- | Adaptative Matching | --
-----------------------------
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods fil max pId pIds =
case fil of
ToChilds -> take max $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
ToParents -> take max $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]]
getCandidates fil g pIds targets =
case fil of
ToChilds -> targets'
ToParents -> reverse targets'
where
targets' :: [[PhyloGroup]]
targets' = map (\groups' -> filter (\g' -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) groups') $ elems
$ filterWithKey (\k _ -> elem k pIds)
$ fromListWith (++)
$ sortOn (fst . fst)
$ map (\g' -> (g' ^. phylo_groupPeriod,[g'])) targets
shouldBreak :: Double -> [(Double,[PhyloGroup])] -> Bool
shouldBreak thr branches = any (\(quality,_) -> quality < thr) branches
toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
toBranchQuality branches = undefined
reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
reframeDocs docs periods = restrictKeys docs $ periodsToYears periods
adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup]
adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates periods =
-- | check if we should break some of the new branches or not
case shouldBreak thrQua branches' of
True -> concat $ map (\(s,b) ->
if s >= thrQua
-- | we keep the branch as it is
then b
-- | we break the branch using an increased temporal matching threshold
else let nextGroups = undefined
nextCandidates = undefined
nextPeriods = undefined
in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua
(reframeDocs docs nextPeriods)
nextGroups nextCandidates nextPeriods
) branches'
-- | the quality of all the new branches is sufficient
False -> concat branches
where
-- | 3) process a quality score for each new branch
branches' :: [(Double,[PhyloGroup])]
branches' = toBranchQuality branches
-- | 2) group the new groups into branches
branches :: [[PhyloGroup]]
branches = relatedComponents groups'
-- | 1) connect each group to its parents and childs
groups' :: [PhyloGroup]
groups' = map (\group ->
let childs = getCandidates ToChilds group
(getNextPeriods ToChilds maxTime (group ^. phylo_groupPeriod) periods) candidates
parents = getCandidates ToParents group
(getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates
-- | match the group to its possible childs then parents
in matchGroupToGroups parents $ matchGroupToGroups childs group
) groups
temporalMatching :: Phylo -> Phylo
temporalMatching phylo =
let branches = fromList $ map (\g -> (getGroupId g, g))
$ adaptativeMatching (timeMatching $ getConfig phylo) 0 0 0
(phylo ^. phylo_timeDocs)
(getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo)
in updatePhyloGroups 1 branches phylo
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment