Commit ff80ee2f authored by qlobbe's avatar qlobbe

fix temporalMatching

parent 38d3a9cd
Pipeline #415 failed with stage
...@@ -77,6 +77,8 @@ data Phylo = ...@@ -77,6 +77,8 @@ data Phylo =
Phylo { _phylo_duration :: (Start, End) Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations , _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod] , _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -150,6 +152,7 @@ data PhyloGroup = ...@@ -150,6 +152,7 @@ data PhyloGroup =
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double , _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId , _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
......
...@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo ...@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of ...@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity -- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge]) groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc = case prox of groupsToGraph nbDocs prox gs = case prox of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (getGroupCooc y) nbDocs))
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc))) $ getCandidates gs)
-- $ getCandidates gs) Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard' sens (getGroupNgrams x) (getGroupNgrams y) cooc))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
_ -> undefined _ -> undefined
...@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
-------------------------------------- --------------------------------------
graphs :: [([GroupNode],[GroupEdge])] graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox) graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
-------------------------------------- --------------------------------------
prox :: Proximity prox :: Proximity
prox = getProximity clus prox = getProximity clus
......
...@@ -17,8 +17,10 @@ Portability : POSIX ...@@ -17,8 +17,10 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc module Gargantext.Viz.Phylo.Aggregates.Cooc
where where
import Data.List (union,concat,nub) import Data.List (union,concat,nub,sort)
import Data.Map (Map,elems,adjust,filterWithKey) import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
...@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g) ...@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
-------------------------------------- --------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- phyloCooc :: Map (Int, Int) Double -- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1 -- phyloCooc = fisToCooc phyloFis phylo1_0_1
...@@ -17,8 +17,7 @@ Portability : POSIX ...@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Document module Gargantext.Viz.Phylo.Aggregates.Document
where where
import Data.List (last) import Data.Map (Map,fromListWith)
import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple (fst) import Data.Tuple (fst)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector ...@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector
-- | To init a list of Periods framed by a starting Date and an ending Date -- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)] initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l)) initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end] $ chunkAlong g s [start .. end]
...@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds ...@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) = inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h fst $ List.partition (\d -> f' d >= start && f' d <= end) h
-------------------------------------- --------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams -- | To parse a list of Documents by filtering on a Vector of Ngrams
...@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t) ...@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots) -> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c $ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
...@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of ...@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis] filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
......
...@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList) ...@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker import Gargantext.Viz.Phylo.LevelMaker
...@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.3 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 0) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 20) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
...@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1 ...@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster] phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1 phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
---------------------------------- ----------------------------------
...@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase ...@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) defaultPhyloParam phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc defaultPhyloParam
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double
nbDocs = countDocs corpus
periods :: [(Date,Date)] periods :: [(Date,Date)]
periods = initPeriods 5 3 periods = initPeriods 5 3
......
...@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis ...@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster ...@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2") | otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
-------------------------------------- --------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup] -- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m _ = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m) $ zip [1..] l toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
-------------------------------------- --------------------------------------
...@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document ...@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup -- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> PhyloGroup clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m = clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups) PhyloGroup ((prd, lvl), idx) lbl ngrams empty
Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] (map (\g -> (getGroupId g, 1)) groups)
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
...@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m = ...@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup -- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p = cliqueToGroup prd lvl idx lbl fis p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ getSupport fis)) Nothing
(getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
where where
-------------------------------------- --------------------------------------
ngrams :: [Int] ngrams :: [Int]
...@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p = ...@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p =
-- | To transform a list of Ngrams into a PhyloGroup -- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing [] [] [] [] PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups -- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
...@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p ...@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus | otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1) $ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$ transposePeriodLinks (lvl + 1) $ transposePeriodLinks (lvl + 1)
$ setLevelLinks (lvl, lvl + 1) $ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) $ addPhyloLevel (lvl + 1)
...@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)] ...@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)]
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
where where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs (foundations ^. phylo_foundationsRoots) c) (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs c
-------------------------------------- --------------------------------------
foundations :: PhyloFoundations foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList foundations = PhyloFoundations (initFoundationsRoots roots) termList
...@@ -240,8 +250,14 @@ instance PhyloMaker [Document] ...@@ -240,8 +250,14 @@ instance PhyloMaker [Document]
phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p toPhyloBase q p c roots termList = initPhyloBase periods foundations nbDocs cooc p
where where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
-------------------------------------- --------------------------------------
foundations :: PhyloFoundations foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots roots) termList foundations = PhyloFoundations (initFoundationsRoots roots) termList
......
This diff is collapsed.
...@@ -17,65 +17,40 @@ Portability : POSIX ...@@ -17,65 +17,40 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity module Gargantext.Viz.Phylo.Metrics.Proximity
where where
import Data.List (null,intersect,union) import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size) import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,keys)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields sumInvLog :: Double -> [Double] -> Double
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard' :: Double -> [Int] -> [Int] -> Map (Int, Int) Double -> Double -- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard' s idx idx' cooc weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
| null idxUnion = 0 weightedLogJaccard sens cooc cooc' nbDocs
| idxUnion == idxInter = 1 | null union' = 0
| s == 0 = (fromIntegral $ length idxInter)/(fromIntegral $ length idxUnion) | union' == inter' = 1
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion) | sens == 0 = (fromIntegral $ length $ keys inter') / (fromIntegral $ length $ keys union')
| otherwise = (sumLog wInter)/(sumLog wUnion) | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
| otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
where where
-------------------------------------- --------------------------------------
wInter :: [Double] wInter :: Map (Int,Int) Double
wInter = elems $ getSubCooc idxInter cooc wInter = map (/nbDocs) inter'
--------------------------------------
wUnion :: [Double]
wUnion = elems $ getSubCooc idxUnion cooc
--------------------------------------
idxInter :: [Int]
idxInter = intersect idx idx'
-------------------------------------- --------------------------------------
idxUnion :: [Int] wUnion :: Map (Int,Int) Double
idxUnion = union idx idx' wUnion = map (/nbDocs) union'
-------------------------------------- --------------------------------------
sumInvLog :: [Double] -> Double inter' :: Map (Int, Int) Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l inter' = intersectionWith (+) cooc cooc'
--------------------------------------
union' :: Map (Int, Int) Double
union' = unionWith (+) cooc cooc'
-------------------------------------- --------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields -- | To process the Hamming distance between two PhyloGroup fields
......
...@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools ...@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where where
import Control.Lens hiding (both, Level, Empty) import Control.Lens hiding (both, Level, Empty)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy) import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe) import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!)) import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text,toLower,unwords) import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra import Data.Tuple.Extra
...@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams ...@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations -- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> PhyloParam -> Phylo initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> PhyloParam -> Phylo
initPhyloBase pds fds prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) prm initPhyloBase pds fds nbDocs cooc prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc prm
-- | To init the param of a Phylo -- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
...@@ -175,6 +175,11 @@ getLastLevel p = (last . sort) ...@@ -175,6 +175,11 @@ getLastLevel p = (last . sort)
. traverse . traverse
. phylo_periodLevels ) p . phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-------------------- --------------------
-- | PhyloRoots | -- -- | PhyloRoots | --
...@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of ...@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots" Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
-------------------- --------------------
-- | PhyloGroup | -- -- | PhyloGroup | --
-------------------- --------------------
...@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId ...@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup -- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId getGroupLevel = snd . fst . getGroupId
...@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup ...@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx) (((from', to'), lvl), idx)
lbl lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams) idxs
(Map.empty) (Map.empty)
Nothing Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] [] [] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
--------------------- ---------------------
...@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod ...@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat) [[d,d'] | (d,d') <- periods]
-------------------- --------------------
-- | PhyloLevel | -- -- | PhyloLevel | --
-------------------- --------------------
......
...@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v' ...@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where where
-------------------------------------- --------------------------------------
v' :: PhyloView v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min')) v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
-------------------------------------- --------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment