Commit 6fc1eb02 authored by qlobbe's avatar qlobbe

phyloQuality

parent 99b5de7d
Pipeline #555 failed with stage
...@@ -17,9 +17,9 @@ Portability : POSIX ...@@ -17,9 +17,9 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloTools where module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub)
import Data.Set (Set, size) import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!)) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
import Data.String (String) import Data.String (String)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -184,6 +184,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst ...@@ -184,6 +184,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc :: Cooc -> Cooc -> Cooc sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc' sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
-------------------- --------------------
-- | PhyloGroup | -- -- | PhyloGroup | --
-------------------- --------------------
...@@ -255,11 +258,6 @@ updatePhyloGroups lvl m phylo = ...@@ -255,11 +258,6 @@ updatePhyloGroups lvl m phylo =
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link] pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
-- mergeLinks :: [Link] -> [Link] -> [Link]
-- mergeLinks toChilds toParents =
-- let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
-- in toList $ unionWith max (fromList toParents) toChilds'
------------------- -------------------
-- | Proximity | -- -- | Proximity | --
...@@ -279,3 +277,11 @@ getThresholdStep :: Proximity -> Double ...@@ -279,3 +277,11 @@ getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s WeightedLogJaccard _ _ s -> s
Hamming -> undefined Hamming -> undefined
----------------
-- | Branch | --
----------------
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
\ No newline at end of file
...@@ -16,13 +16,14 @@ Portability : POSIX ...@@ -16,13 +16,14 @@ Portability : POSIX
module Gargantext.Viz.Phylo.TemporalMatching where module Gargantext.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, nub, union) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, find, groupBy, scanl, nub, union)
import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith) import Data.Map (Map, fromList, fromListWith, filterWithKey, elems, restrictKeys, unionWith, intersectionWith, findWithDefault)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.SynchronicClustering import Gargantext.Viz.Phylo.SynchronicClustering
import Prelude (logBase)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -87,7 +88,7 @@ filterProximity proximity thr local = ...@@ -87,7 +88,7 @@ filterProximity proximity thr local =
-- | To process the proximity between a current group and a pair of targets group -- | To process the proximity between a current group and a pair of targets group
toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double toProximity :: Map Date Double -> Proximity -> PhyloGroup -> PhyloGroup -> PhyloGroup -> Double
toProximity docs proximity group target target' = toProximity docs proximity ego target target' =
let docs' = sum $ elems docs let docs' = sum $ elems docs
cooc = if target == target' cooc = if target == target'
then (target ^. phylo_groupCooc) then (target ^. phylo_groupCooc)
...@@ -95,7 +96,7 @@ toProximity docs proximity group target target' = ...@@ -95,7 +96,7 @@ toProximity docs proximity group target target' =
ngrams = if target == target' ngrams = if target == target'
then (target ^. phylo_groupNgrams) then (target ^. phylo_groupNgrams)
else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams) else union (target ^. phylo_groupNgrams) (target' ^. phylo_groupNgrams)
in pickProximity proximity docs' (group ^. phylo_groupCooc) cooc (group ^. phylo_groupNgrams) ngrams in pickProximity proximity docs' (ego ^. phylo_groupCooc) cooc (ego ^. phylo_groupNgrams) ngrams
------------------------ ------------------------
...@@ -117,9 +118,9 @@ makePairs candidates periods = case null periods of ...@@ -117,9 +118,9 @@ makePairs candidates periods = case null periods of
phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double-> PhyloGroup -> PhyloGroup phyloGroupMatching :: [[PhyloGroup]] -> Filiation -> Proximity -> Map Date Double -> Double-> PhyloGroup -> PhyloGroup
phyloGroupMatching candidates fil proxi docs thr group = case pointers of phyloGroupMatching candidates fil proxi docs thr ego = case pointers of
Nothing -> addPointers group fil TemporalPointer [] Nothing -> addPointers ego fil TemporalPointer []
Just pts -> addPointers group fil TemporalPointer Just pts -> addPointers ego fil TemporalPointer
$ head' "phyloGroupMatching" $ head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity -- | Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt') $ groupBy (\pt pt' -> snd pt == snd pt')
...@@ -136,7 +137,7 @@ phyloGroupMatching candidates fil proxi docs thr group = case pointers of ...@@ -136,7 +137,7 @@ phyloGroupMatching candidates fil proxi docs thr group = case pointers of
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates -- | process the proximity between the current group and a pair of candidates
let proximity = toProximity (filterDocs docs periods) proxi group c c' let proximity = toProximity (filterDocs docs periods) proxi ego c c'
in if (c == c') in if (c == c')
then [(getGroupId c,proximity)] then [(getGroupId c,proximity)]
else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs) else [(getGroupId c,proximity),(getGroupId c',proximity)] ) pairs)
...@@ -161,13 +162,13 @@ getNextPeriods fil max' pId pIds = ...@@ -161,13 +162,13 @@ getNextPeriods fil max' pId pIds =
getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]] getCandidates :: Filiation -> PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [[PhyloGroup]]
getCandidates fil g pIds targets = getCandidates fil ego pIds targets =
case fil of case fil of
ToChilds -> targets' ToChilds -> targets'
ToParents -> reverse targets' ToParents -> reverse targets'
where where
targets' :: [[PhyloGroup]] targets' :: [[PhyloGroup]]
targets' = map (\groups' -> filter (\g' -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) groups') $ elems targets' = map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) groups') $ elems
$ filterWithKey (\k _ -> elem k pIds) $ filterWithKey (\k _ -> elem k pIds)
$ fromListWith (++) $ fromListWith (++)
$ sortOn (fst . fst) $ sortOn (fst . fst)
...@@ -186,13 +187,44 @@ processMatching max' periods proximity thr docs groups = ...@@ -186,13 +187,44 @@ processMatching max' periods proximity thr docs groups =
) groups ) groups
----------------------------- -----------------------
-- | Adaptative Matching | -- -- | Phylo Quality | --
----------------------------- -----------------------
termFreq :: Int -> [[PhyloGroup]] -> Double
termFreq term branches = (sum $ map (\g -> findWithDefault 0 (term,term) (g ^. phylo_groupCooc)) $ concat branches)
/ (sum $ map (\g -> getTrace $ g ^. phylo_groupCooc) $ concat branches)
entropy :: [[PhyloGroup]] -> Double
entropy branches =
let terms = ngramsInBranches branches
in sum $ map (\term -> (1 / log (termFreq term branches))
/ (sum $ map (\branch -> 1 / log (termFreq term [branch])) branches)
* (sum $ map (\branch ->
let q = branchObs term (length $ concat branches) branch
in q * logBase 2 q ) branches) ) terms
where
-- | Probability to observe a branch given a random term of the phylo
branchObs :: Int -> Int -> [PhyloGroup] -> Double
branchObs term total branch = (fromIntegral $ length $ filter (\g -> elem term $ g ^. phylo_groupNgrams) branch)
/ (fromIntegral total)
homogeneity :: [[PhyloGroup]] -> Double
homogeneity branches = undefined
where
branchCov ::
toPhyloQuality :: [[PhyloGroup]] -> Double toPhyloQuality :: [[PhyloGroup]] -> Double
toPhyloQuality _ = undefined toPhyloQuality branches = sqrt (homogeneity branches / entropy branches)
-----------------------------
-- | Adaptative Matching | --
-----------------------------
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment