Commit d3097207 authored by qlobbe's avatar qlobbe

color update

parent e4e913ab
......@@ -52,45 +52,53 @@ import Control.Lens (makeLenses)
data CorpusParser = Wos | Csv deriving (Show,Generic,Eq)
data Proximity = WeightedLogJaccard {_sensibility :: Double}
| Hamming
deriving (Show,Generic,Eq)
data Config =
Config { corpusPath :: FilePath
, listPath :: FilePath
, outputPath :: FilePath
, corpusParser :: CorpusParser
, corpusLimit :: Int
, phyloName :: Text
, phyloLevel :: Int
, timeUnit :: Int
, timeMatching :: Int
, timePeriod :: Int
, timeStep :: Int
, fisSupport :: Int
, fisSize :: Int
, branchSize :: Int
Config { corpusPath :: FilePath
, listPath :: FilePath
, outputPath :: FilePath
, corpusParser :: CorpusParser
, corpusLimit :: Int
, phyloName :: Text
, phyloLevel :: Int
, phyloProximity :: Proximity
, timeUnit :: Int
, maxTimeMatch :: Int
, timePeriod :: Int
, timeStep :: Int
, fisSupport :: Int
, fisSize :: Int
, branchSize :: Int
} deriving (Show,Generic,Eq)
defaultConfig :: Config
defaultConfig =
Config { corpusPath = ""
, listPath = ""
, outputPath = ""
, corpusParser = Csv
, corpusLimit = 1000
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, timeUnit = 1
, timeMatching = 5
, timePeriod = 3
, timeStep = 1
, fisSupport = 2
, fisSize = 4
, branchSize = 3
Config { corpusPath = ""
, listPath = ""
, outputPath = ""
, corpusParser = Csv
, corpusLimit = 1000
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
, timeUnit = 1
, maxTimeMatch = 5
, timePeriod = 3
, timeStep = 1
, fisSupport = 2
, fisSize = 4
, branchSize = 3
}
instance FromJSON Config
instance ToJSON Config
instance FromJSON CorpusParser
instance ToJSON CorpusParser
instance FromJSON Proximity
instance ToJSON Proximity
-- | Software parameters
......@@ -223,6 +231,7 @@ data PhyloGroup =
, _phylo_groupIndex :: Int
, _phylo_groupSupport :: Support
, _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
......@@ -238,6 +247,8 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
type Link = ((PhyloGroupId, PhyloGroupId), Weight)
data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), filterWithKey, restrictKeys)
import Data.Set (size)
import Data.Vector (Vector)
......@@ -58,7 +58,7 @@ toPhylo docs lst conf = phylo1
--------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
......@@ -70,20 +70,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phyloFis = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [(((pId,lvl),length groups),f obj pId lvl (length groups) (getRoots phylo))] ) [] phyloFis)
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis)
else
phyloLvl )
phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup
fisToGroup fis pId lvl idx fdt =
PhyloGroup pId lvl idx
(fis ^. phyloFis_support)
(ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt)
(1,[])
[] [] [] []
Nothing
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt
in PhyloGroup pId lvl idx
(fis ^. phyloFis_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[])
[] [] [] []
Nothing
toPhylo1 :: [Document] -> Phylo -> Phylo
......@@ -160,6 +165,14 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
--------------------
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
-- | To transform the docs into a time map of coocurency matrix
docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
docsToCoocByYear docs fdt conf =
......
......@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn)
import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!))
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), toList)
import Data.String (String)
import Gargantext.Prelude
......@@ -156,9 +156,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc'
---------------
-- | Phylo | --
---------------
--------------------
-- | PhyloGroup | --
--------------------
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
......@@ -216,4 +216,17 @@ updatePhyloGroups lvl m phylo =
in
if member id m
then m ! id
else group ) phylo
\ No newline at end of file
else group ) phylo
------------------
-- | Pointers | --
------------------
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks toChilds toParents =
let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
in toList $ unionWith max (fromList toParents) toChilds'
\ No newline at end of file
......@@ -15,8 +15,8 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub)
import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys)
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, any, nub, union)
import Data.Map (Map, fromList, toList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
......@@ -30,35 +30,74 @@ import Control.Lens hiding (Level)
-- | Proximity | --
-------------------
-- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
-- periodsToNbDocs prds phylo = sum $ elems
-- $ restrictKeys (phylo ^. phylo_docsByYears)
-- $ periodsToYears prds
-- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
-- matchWithPairs g1 (g2,g3) p =
-- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
-- cooc = if (g2 == g3)
-- then getGroupCooc g2
-- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
-- ngrams = if (g2 == g3)
-- then getGroupNgrams g2
-- else union (getGroupNgrams g2) (getGroupNgrams g3)
-- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
-- | Process the inverse sumLog
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- | Process the sumLog
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To process a WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog sens coocInter) / (sumInvLog sens coocUnion)
| otherwise = (sumLog sens coocInter) / (sumLog sens coocUnion)
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ngrams ngrams'
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ngrams ngrams'
--------------------------------------
coocInter :: [Double]
coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
coocUnion :: [Double]
coocUnion = elems $ map (/docs) $ unionWith (+) cooc cooc'
--------------------------------------
-- | To choose a proximity function
pickProximity :: Proximity -> Double -> Cooc -> Cooc -> [Int] -> [Int] -> Double
pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard sens -> weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
Hamming -> undefined
-- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs proximity group target target' =
let docs' = sum $ elems docs
cooc = if target == target'
then (target ^. phylo_groupCooc)
else sumCooc (target ^. phylo_groupCooc) (target' ^. phylo_groupCooc)
ngrams = if target == target'
then (target ^. phylo_groupNgrams)
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams)
in pickProximity proximity docs' (group ^. phylo_groupCooc) cooc (group ^. phylo_groupNgrams) ngrams
toProximity :: Map Date Double -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs group target target' =
let nbDocs = sum $ elems docs
in undefined
------------------------
-- | Local Matching | --
------------------------
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods thr docs group = case null periods of
-- | Find pairs of valuable candidates to be matched
makePairs :: [PhyloGroup] -> [PhyloPeriodId] -> Double -> Map Date Double -> Proximity -> PhyloGroup -> [(PhyloGroup,PhyloGroup)]
makePairs candidates periods thr docs proximity group = case null periods of
True -> []
-- | at least on of the pair candidates should be from the last added period
False -> filter (\(cdt,cdt') -> (inLastPeriod cdt periods)
......@@ -66,14 +105,14 @@ makePairs candidates periods thr docs group = case null periods of
$ listToKeys
-- | remove poor candidates from previous periods
$ filter (\cdt -> (inLastPeriod cdt periods)
|| ((toProximity (reframeDocs docs periods) group group cdt) >= thr)) candidates
|| ((toProximity (reframeDocs docs periods) proximity group group cdt) >= thr)) candidates
where
inLastPeriod :: PhyloGroup -> [PhyloPeriodId] -> Bool
inLastPeriod g prds = (g ^. phylo_groupPeriod) == (last' "makePairs" prds)
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil thr docs group = case pointers of
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Double -> Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil thr docs proxi group = case pointers of
Nothing -> addPointers group fil TemporalPointer []
Just pts -> addPointers group fil TemporalPointer
$ head' "phyloGroupMatching"
......@@ -87,12 +126,12 @@ phyloGroupMatching candidates fil thr docs group = case pointers of
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc groups ->
let periods = nub $ map (\g' -> g' ^. phylo_groupPeriod) $ concat groups
pairs = makePairs (concat groups) periods thr docs group
pairs = makePairs (concat groups) periods thr docs proxi group
in acc ++ ( filter (\(_,proximity) -> proximity >= thr )
$ concat
$ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates
let proximity = toProximity (reframeDocs docs periods) group c c'
let proximity = toProximity (reframeDocs docs periods) proxi group c c'
in if (c == c')
then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
......@@ -101,12 +140,6 @@ phyloGroupMatching candidates fil thr docs group = case pointers of
$ inits candidates
matchGroupToGroups :: [[PhyloGroup]] -> PhyloGroup -> PhyloGroup
matchGroupToGroups candidates group = undefined
-----------------------------
-- | Adaptative Matching | --
-----------------------------
......@@ -145,8 +178,10 @@ reframeDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
reframeDocs docs periods = restrictKeys docs $ periodsToYears periods
adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup]
adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates periods =
-- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId
adaptativeMatching :: Int -> Double -> Double -> Double -> Map Date Double -> Proximity -> [PhyloGroup] -> [PhyloGroup] -> [PhyloPeriodId] -> [PhyloGroup]
adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candidates periods =
-- | check if we should break some of the new branches or not
case shouldBreak thrQua branches' of
True -> concat $ map (\(s,b) ->
......@@ -159,6 +194,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates period
nextPeriods = undefined
in adaptativeMatching maxTime thrStep (thrMatch + thrStep) thrQua
(reframeDocs docs nextPeriods)
proximity
nextGroups nextCandidates nextPeriods
) branches'
-- | the quality of all the new branches is sufficient
......@@ -178,14 +214,16 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs groups candidates period
parents = getCandidates ToParents group
(getNextPeriods ToParents maxTime (group ^. phylo_groupPeriod) periods) candidates
-- | match the group to its possible childs then parents
in matchGroupToGroups parents $ matchGroupToGroups childs group
in phyloGroupMatching parents ToParents thrMatch docs proximity
$ phyloGroupMatching childs ToChilds thrMatch docs proximity group
) groups
temporalMatching :: Phylo -> Phylo
temporalMatching phylo =
let branches = fromList $ map (\g -> (getGroupId g, g))
$ adaptativeMatching (timeMatching $ getConfig phylo) 0 0 0
(phylo ^. phylo_timeDocs)
$ adaptativeMatching (maxTimeMatch $ getConfig phylo) 0 0 0
(phylo ^. phylo_timeDocs)
(phyloProximity $ getConfig phylo)
(getGroupsFromLevel 1 phylo) (getGroupsFromLevel 1 phylo) (getPeriodIds phylo)
in updatePhyloGroups 1 branches phylo
\ No newline at end of file
......@@ -137,9 +137,9 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea
colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d
| d == 0 = H.BGColor (toColor PaleGreen)
| d == 1 = H.BGColor (toColor SkyBlue)
| d == 2 = H.BGColor (toColor LightPink)
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment