Commit d3097207 authored by qlobbe's avatar qlobbe

color update

parent e4e913ab
Pipeline #547 failed with stage
...@@ -52,6 +52,10 @@ import Control.Lens (makeLenses) ...@@ -52,6 +52,10 @@ import Control.Lens (makeLenses)
data CorpusParser = Wos | Csv deriving (Show,Generic,Eq) data CorpusParser = Wos | Csv deriving (Show,Generic,Eq)
data Proximity = WeightedLogJaccard {_sensibility :: Double}
| Hamming
deriving (Show,Generic,Eq)
data Config = data Config =
Config { corpusPath :: FilePath Config { corpusPath :: FilePath
, listPath :: FilePath , listPath :: FilePath
...@@ -60,8 +64,9 @@ data Config = ...@@ -60,8 +64,9 @@ data Config =
, corpusLimit :: Int , corpusLimit :: Int
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloProximity :: Proximity
, timeUnit :: Int , timeUnit :: Int
, timeMatching :: Int , maxTimeMatch :: Int
, timePeriod :: Int , timePeriod :: Int
, timeStep :: Int , timeStep :: Int
, fisSupport :: Int , fisSupport :: Int
...@@ -78,8 +83,9 @@ defaultConfig = ...@@ -78,8 +83,9 @@ defaultConfig =
, corpusLimit = 1000 , corpusLimit = 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
, timeUnit = 1 , timeUnit = 1
, timeMatching = 5 , maxTimeMatch = 5
, timePeriod = 3 , timePeriod = 3
, timeStep = 1 , timeStep = 1
, fisSupport = 2 , fisSupport = 2
...@@ -91,6 +97,8 @@ instance FromJSON Config ...@@ -91,6 +97,8 @@ instance FromJSON Config
instance ToJSON Config instance ToJSON Config
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity
instance ToJSON Proximity
-- | Software parameters -- | Software parameters
...@@ -223,6 +231,7 @@ data PhyloGroup = ...@@ -223,6 +231,7 @@ data PhyloGroup =
, _phylo_groupIndex :: Int , _phylo_groupIndex :: Int
, _phylo_groupSupport :: Support , _phylo_groupSupport :: Support
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId , _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupLevelParents :: [Pointer] , _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer] , _phylo_groupLevelChilds :: [Pointer]
...@@ -238,6 +247,8 @@ type Weight = Double ...@@ -238,6 +247,8 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup -- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight) type Pointer = (PhyloGroupId, Weight)
type Link = ((PhyloGroupId, PhyloGroupId), Weight)
data Filiation = ToParents | ToChilds deriving (Generic, Show) data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
......
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++)) import Data.List (concat, nub, partition, sort, (++))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!)) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), filterWithKey, restrictKeys)
import Data.Set (size) import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -58,7 +58,7 @@ toPhylo docs lst conf = phylo1 ...@@ -58,7 +58,7 @@ toPhylo docs lst conf = phylo1
-------------------- --------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -70,17 +70,22 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -70,17 +70,22 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phyloFis = m ! pId phyloFis = m ! pId
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [(((pId,lvl),length groups),f obj pId lvl (length groups) (getRoots phylo))] ) [] phyloFis) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis)
else else
phyloLvl ) phyloLvl )
phylo phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt = fisToGroup fis pId lvl idx fdt coocs =
PhyloGroup pId lvl idx let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt
in PhyloGroup pId lvl idx
(fis ^. phyloFis_support) (fis ^. phyloFis_support)
(ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt) ngrams
(ngramsToCooc ngrams coocs)
(1,[]) (1,[])
[] [] [] [] [] [] [] []
Nothing Nothing
...@@ -160,6 +165,14 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis" ...@@ -160,6 +165,14 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
-------------------- --------------------
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
-- | To transform the docs into a time map of coocurency matrix -- | To transform the docs into a time map of coocurency matrix
docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
docsToCoocByYear docs fdt conf = docsToCoocByYear docs fdt conf =
......
...@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where ...@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn) import Data.List (sort, concat, null, union, (++), tails, sortOn)
import Data.Set (Set, size) import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!)) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), toList)
import Data.String (String) import Data.String (String)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -156,9 +156,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst ...@@ -156,9 +156,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc :: Cooc -> Cooc -> Cooc sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc' sumCooc cooc cooc' = unionWith (+) cooc cooc'
--------------- --------------------
-- | Phylo | -- -- | PhyloGroup | --
--------------- --------------------
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex) getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
...@@ -217,3 +217,16 @@ updatePhyloGroups lvl m phylo = ...@@ -217,3 +217,16 @@ updatePhyloGroups lvl m phylo =
if member id m if member id m
then m ! id then m ! id
else group ) phylo else group ) phylo
------------------
-- | Pointers | --
------------------
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks toChilds toParents =
let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
in toList $ unionWith max (fromList toParents) toChilds'
\ No newline at end of file
...@@ -15,8 +15,8 @@ Portability : POSIX ...@@ -15,8 +15,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub, union)
import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys) import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
...@@ -30,35 +30,74 @@ import Control.Lens hiding (Level) ...@@ -30,35 +30,74 @@ import Control.Lens hiding (Level)
-- | Proximity | -- -- | Proximity | --
------------------- -------------------
-- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
-- periodsToNbDocs prds phylo = sum $ elems
-- $ restrictKeys (phylo ^. phylo_docsByYears)
-- $ periodsToYears prds
-- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double -- | Process the inverse sumLog
-- matchWithPairs g1 (g2,g3) p = sumInvLog :: Double -> [Double] -> Double
-- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- cooc = if (g2 == g3)
-- then getGroupCooc g2
-- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
-- ngrams = if (g2 == g3)
-- then getGroupNgrams g2
-- else union (getGroupNgrams g2) (getGroupNgrams g3)
-- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
toProximity :: Map Date Double -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double -- | Process the sumLog
toProximity docs group target target' = sumLog :: Double -> [Double] -> Double
let nbDocs = sum $ elems docs sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
in undefined
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To process a WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog sens coocInter) / (sumInvLog sens coocUnion)
| otherwise = (sumLog sens coocInter) / (sumLog sens coocUnion)
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ngrams ngrams'
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ngrams ngrams'
--------------------------------------
coocInter :: [Double]
coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
coocUnion :: [Double]
coocUnion = elems $ map (/docs) $ unionWith (+) cooc cooc'
--------------------------------------
-- | To choose a proximity function
pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard sens -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
Hamming -> undefined
-- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs proximity group target target' =
let docs' = sum $ elems docs
cooc = if target == target'
then (target ^. phylo_groupCooc)
else sumCooc (target ^. phylo_groupCooc) (target' ^. phylo_groupCooc)
ngrams = if target == target'
then (target ^. phylo_groupNgrams)
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams)
in pickProximity proximity docs' (group ^. phylo_groupCooc) cooc (group ^. phylo_groupNgrams) ngrams
------------------------ ------------------------
-- | Local Matching | -- -- | Local Matching | --
------------------------ ------------------------
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)] -- | Find pairs of valuable candidates to be matched
makePairs candidates periods thr docs group = case null periods of makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> Proximity -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods thr docs proximity group = case null periods of
True -> [] True -> []
-- | at least on of the pair candidates should be from the last added period -- | at least on of the pair candidates should be from the last added period
False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods) False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
...@@ -66,14 +105,14 @@ makePairs candidates periods thr docs group = case null periods of ...@@ -66,14 +105,14 @@ makePairs candidates periods thr docs group = case null periods of
$ listToKeys $ listToKeys
-- | remove poor candidates from previous periods -- | remove poor candidates from previous periods
$ filter (\cdt -> (inLastPeriod cdt periods) $ filter (\cdt -> (inLastPeriod cdt periods)
|| ((toProximity (reframeDocs docs periods) group group cdt) >= thr)) candidates || ((toProximity (reframeDocs docs periods) proximity group group cdt) >= thr)) candidates
where where
inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds) inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> PhyloGroup -> PhyloGroup phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil thr docs group = case pointers of phyloGroupMatching candidates fil thr docs proxi group = case pointers of
Nothing -> addPointers group fil TemporalPointer [] Nothing -> addPointers group fil TemporalPointer []
Just pts -> addPointers group fil TemporalPointer Just pts -> addPointers group fil TemporalPointer
$ head' "phyloGroupMatching" $ head' "phyloGroupMatching"
...@@ -87,12 +126,12 @@ phyloGroupMatching candidates fil thr docs group = case pointers of ...@@ -87,12 +126,12 @@ phyloGroupMatching candidates fil thr docs group = case pointers of
-- | for each time frame, process the proximity on relevant pairs of targeted groups -- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
pairs = makePairs (concat groups) periods thr docs group pairs = makePairs (concat groups) periods thr docs proxi group
in acc ++ ( filter (\(_,proximity) -> proximity >= thr ) in acc ++ ( filter (\(_,proximity) -> proximity >= thr )
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
let proximity = toProximity (reframeDocs docs periods) group c c' let proximity = toProximity (reframeDocs docs periods) proxi group c c'
in if (c == c') in if (c == c')
then [(getGroupId c,proximity)] then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs) else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
...@@ -101,12 +140,6 @@ phyloGroupMatching candidates fil thr docs group = case pointers of ...@@ -101,12 +140,6 @@ phyloGroupMatching candidates fil thr docs group = case pointers of
$ inits candidates $ inits candidates
matchGroupToGroups :: [[PhyloGroup]] -> PhyloGroup -> PhyloGroup
matchGroupToGroups candidates group = undefined
----------------------------- -----------------------------
-- | Adaptative Matching | -- -- | Adaptative Matching | --
----------------------------- -----------------------------
...@@ -145,8 +178,10 @@ reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double ...@@ -145,8 +178,10 @@ reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
reframeDocs docs periods = restrictKeys docs $ periodsToYears periods reframeDocs docs periods = restrictKeys docs $ periodsToYears periods
adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup] -- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId
adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates periods =
adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup]
adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candidates periods =
-- | check if we should break some of the new branches or not -- | check if we should break some of the new branches or not
case shouldBreak thrQua branches' of case shouldBreak thrQua branches' of
True -> concat $ map (\(s,b) -> True -> concat $ map (\(s,b) ->
...@@ -159,6 +194,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates period ...@@ -159,6 +194,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates period
nextPeriods = undefined nextPeriods = undefined
in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua
(reframeDocs docs nextPeriods) (reframeDocs docs nextPeriods)
proximity
nextGroups nextCandidates nextPeriods nextGroups nextCandidates nextPeriods
) branches' ) branches'
-- | the quality of all the new branches is sufficient -- | the quality of all the new branches is sufficient
...@@ -178,14 +214,16 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates period ...@@ -178,14 +214,16 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates period
parents = getCandidates ToParents group parents = getCandidates ToParents group
(getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates (getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates
-- | match the group to its possible childs then parents -- | match the group to its possible childs then parents
in matchGroupToGroups parents $ matchGroupToGroups childs group in phyloGroupMatching parents ToParents thrMatch docs proximity
$ phyloGroupMatching childs ToChilds thrMatch docs proximity group
) groups ) groups
temporalMatching :: Phylo -> Phylo temporalMatching :: Phylo -> Phylo
temporalMatching phylo = temporalMatching phylo =
let branches = fromList $ map (\g -> (getGroupId g, g)) let branches = fromList $ map (\g -> (getGroupId g, g))
$ adaptativeMatching (timeMatching $ getConfig phylo) 0 0 0 $ adaptativeMatching (maxTimeMatch $ getConfig phylo) 0 0 0
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phyloProximity $ getConfig phylo)
(getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo) (getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo)
in updatePhyloGroups 1 branches phylo in updatePhyloGroups 1 branches phylo
\ No newline at end of file
...@@ -137,9 +137,9 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea ...@@ -137,9 +137,9 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea
colorFromDynamics :: Double -> H.Attribute colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d colorFromDynamics d
| d == 0 = H.BGColor (toColor PaleGreen) | d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor SkyBlue) | d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor LightPink) | d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black) | otherwise = H.Color (toColor Black)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment