Commit 4cbd0eb4 authored by Quentin Lobbé's avatar Quentin Lobbé

hard core refactoring

parent db51d0bc
......@@ -183,10 +183,6 @@ data Document = Document
type Cluster = [PhyloGroup]
class AppendToPhylo a where
addPhyloLevel :: Level -> Map (Date,Date) [a] -> Phylo -> Phylo
initPhyloGroup :: a -> PhyloGroup
-- | A List of PhyloGroup in a PhyloGraph
type PhyloNodes = [PhyloGroup]
-- | A List of weighted links between some PhyloGroups in a PhyloGraph
......
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Aggregates.Cluster
where
import Data.List (last,head,union,concat,null,nub,(++),init,tail,(!!))
import Data.Map (Map,elems,adjust,unionWith,intersectionWith)
import Data.Set (Set)
import Data.Tuple (fst, snd)
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
-- | To apply a Clustering method to a PhyloGraph
graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
graphToClusters (clust,param) (nodes,edges) = case clust of
Louvain -> undefined
RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
-- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
$ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters lvl prd p) p
in if null (fst graph)
then []
else graphToClusters (clus,param') graph)
(getPhyloPeriods p))
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Aggregates.Cooc
where
import Data.List (last,head,union,concat)
import Data.Map (Map, elems, adjust)
import Data.Set (Set)
import Data.Tuple (fst, snd)
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
-- | To transform the Fis into a coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [Fis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
$ concat
$ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x)
$ (concat . elems) m
where
--------------------------------------
fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] $ (concat . elems) m
--------------------------------------
docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 $ (concat . elems) m
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
--------------------------------------
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Aggregates.Document
where
import Data.List (last,head)
import Data.Map (Map)
import Data.Text (Text, unwords, toLower, words)
import Data.Tuple (fst, snd)
import Data.Tuple.Extra
import Data.Vector (Vector)
import Gargantext.Prelude hiding (head)
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Vector as Vector
-- | To init a set of periods out of a given Grain and Step
docsToPeriods :: (Ord date, Enum date) => (doc -> date)
-> Grain -> Step -> [doc] -> Map (date, date) [doc]
docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
where
--------------------------------------
hs = steps g s $ both f (head es, last es)
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
steps s' o' (start,end) = map (\l -> (head l, last l))
$ chunkAlong s' o' [start .. end]
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: PhyloNgrams -> [Document] -> [Document]
parseDocs l docs = map (\(Document d t)
-> Document d ( unwords
$ filter (\x -> Vector.elem x l)
$ monoTexts t)) docs
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: Grain -> Step -> [Document] -> PhyloNgrams -> Map (Date, Date) [Document]
groupDocsByPeriod g s docs ngrams = docsToPeriods date g s $ parseDocs ngrams docs
-- | To transform a corpus of texts into a structured list of Documents
corpusToDocs :: [(Date, Text)] -> [Document]
corpusToDocs l = map (\(d,t) -> Document d t) l
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Aggregates.Fis
where
import Data.List (last,head)
import Data.Map (Map)
import Data.Text (Text, unwords, toLower, words)
import Data.Tuple (fst, snd)
import Data.Tuple.Extra
import Data.Vector (Vector)
import Gargantext.Prelude hiding (head)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Vector as Vector
-- | To Filter Fis by support
filterFisBySupport :: Bool -> Int -> Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
filterFisBySupport empty min m = case empty of
True -> Map.map (\l -> filterMinorFis min l) m
False -> Map.map (\l -> keepFilled (filterMinorFis) min l) m
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
filterMinorFis :: Int -> [Fis] -> [Fis]
filterMinorFis min l = filter (\fis -> snd fis > min) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) []
in filter (\fis -> elem (fst fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [Fis]
docsToFis docs = map (\d -> Map.toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (words . text) d)) docs
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.BranchMaker
where
import Data.List (last,head,union,concat,null,nub,(++),init,tail,(!!))
import Data.Map (Map,elems,adjust,unionWith,intersectionWith)
import Data.Set (Set)
import Data.Tuple (fst, snd)
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> PhyloGraph -> Phylo -> [PhyloBranch]
graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c))
$ zip [0..]
$ relatedComp 0 (head nodes) (tail nodes,edges) [] []
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
groupsToGraph (prox,param) groups p = (groups,edges)
where
edges :: PhyloEdges
edges = case prox of
FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
(param !! 0) (getGroupCooc x)
(unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
_ -> undefined
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterPhyloBranches (\l -> l ++ (graphToBranches lvl (groupsToGraph (FromPairs,[]) (getGroupsWithLevel lvl p) p) p) ) p
\ No newline at end of file
......@@ -40,12 +40,22 @@ import Data.Tuple (fst, snd)
import Data.Tuple.Extra
import Data.Vector (Vector, fromList, elemIndex)
import Gargantext.Prelude hiding (head)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo
import Gargantext.Prelude hiding (head)
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Tools
import qualified Data.Bool as Bool
import qualified Data.List as List
import qualified Data.Map as Map
......@@ -56,425 +66,92 @@ import qualified Data.Vector as Vector
------------------------------------------------------------------------
-- | STEP 14 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
-- | STEP 11 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
------------------------------------------------------------------------
-- | STEP 13 | -- Cluster the Fis
-- | STEP 10 | -- Cluster the Fis
-- | To do : ajouter de nouveaux clusters / proxi
-- gérer les cooc à level 2 et +, idem pour les quality
-- réfléchir aux formats de sortie
phylo2 :: Phylo
phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
-- | To apply a Clustering method to a PhyloGraph
graphToClusters :: (Clustering,[Double]) -> PhyloGraph -> [[PhyloGroup]]
graphToClusters (clust,param) (nodes,edges) = case clust of
Louvain -> undefined
RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
phyloCluster :: Map (Date,Date) [Cluster]
phyloCluster = phyloToClusters 1 (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloBranch1
-- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters :: Level -> (Proximity,[Double]) -> (Clustering,[Double]) -> Phylo -> Map (Date,Date) [[PhyloGroup]]
phyloToClusters lvl (prox,param) (clus,param') p = Map.fromList
$ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph (prox,param) (getGroupsWithFilters lvl prd p) p
in if null (fst graph)
then []
else graphToClusters (clus,param') graph)
(getPhyloPeriods p))
------------------------------------------------------------------------
-- | STEP 9 | -- Find the Branches
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [PhyloGroup] -> PhyloGroup
clusterToGroup prd lvl idx lbl groups =
PhyloGroup ((prd, lvl), idx) lbl ((sort . nub . concat) $ map getGroupNgrams groups) empty empty [] [] [] (map (\g -> (getGroupId g, 1)) groups)
phyloBranch1 :: Phylo
phyloBranch1 = setPhyloBranches 1 phylo1_c
-- | To transform a list of Clusters into a new Phylolevel
clustersToPhyloLevel :: Level -> Map (Date,Date) [[PhyloGroup]] -> Phylo -> Phylo
clustersToPhyloLevel lvl m p = over (phylo_periods . traverse)
(\period ->
let periodId = _phylo_periodId period
clusters = zip [1..] (m ! periodId)
in over (phylo_periodLevels)
(\levels ->
let groups = map (\cluster -> clusterToGroup periodId lvl (fst cluster) "" (snd cluster)) clusters
in levels ++ [PhyloLevel (periodId, lvl) groups]
) period) p
------------------------------------------------------------------------
-- | STEP 8 | -- Link the PhyloGroups of level 1 through the Periods
phyloWithGroups2 = clustersToPhyloLevel
2
(phyloToClusters 1 (WeightedLogJaccard,[0]) (RelatedComponents, []) phyloWithBranches_1) phyloWithBranches_1
------------------------------------------------------------------------
-- | STEP 12 | -- Find the Branches
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp idx curr (nodes,edges) next memo
| null nodes' && null next' = memo'
| (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
| otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
where
--------------------------------------
memo' :: [[PhyloGroup]]
memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
--------------------------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> PhyloGraph -> Phylo -> [PhyloBranch]
graphToBranches lvl (nodes,edges) p = map (\(idx,c) -> PhyloBranch (lvl,idx) "" (map getGroupId c))
$ zip [0..]
$ relatedComp 0 (head nodes) (tail nodes,edges) [] []
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph :: (Proximity,[Double]) -> [PhyloGroup] -> Phylo -> PhyloGraph
groupsToGraph (prox,param) groups p = (groups,edges)
where
edges :: PhyloEdges
edges = case prox of
FromPairs -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
WeightedLogJaccard -> map (\(x,y) -> ((x,y), weightedLogJaccard
(param !! 0) (getGroupCooc x)
(unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
_ -> undefined
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterPhyloBranches (\l -> l ++ (graphToBranches lvl (groupsToGraph (FromPairs,[]) (getGroupsWithLevel lvl p) p) p) ) p
phyloWithBranches_1 = setPhyloBranches 1 phyloWithPair_1_Childs
phylo1_c :: Phylo
phylo1_c = pairGroupsToGroups Childs 1 0.01 (WeightedLogJaccard,[0]) phylo1_p
------------------------------------------------------------------------
-- | STEP 11 | -- Link the PhyloGroups of level 1 through the Periods
-- | To process the weightedLogJaccard between two PhyloGroups fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To apply the corresponding proximity function based on a given Proximity
getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
getProximity (prox,param) g1 g2 = case prox of
WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
_ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to id l = case to of
Childs -> unNested id ((tail . snd) next)
Parents -> unNested id ((reverse . fst) next)
_ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
where
--------------------------------------
next :: ([PhyloPeriodId], [PhyloPeriodId])
next = splitAt idx l
--------------------------------------
idx :: Int
idx = case (List.elemIndex id l) of
Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
Just i -> i
--------------------------------------
-- | To have an non-overlapping next period
unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
unNested x l
| null l = []
| nested (fst $ head l) x = unNested x (tail l)
| nested (snd $ head l) x = unNested x (tail l)
| otherwise = l
--------------------------------------
nested :: Date -> PhyloPeriodId -> Bool
nested d prd = d >= fst prd && d <= snd prd
--------------------------------------
-- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates to depth max thr (prox,param) group p
| depth > max || null next = []
| (not . null) best = take 2 best
| otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
where
--------------------------------------
next :: [PhyloPeriodId]
next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
--------------------------------------
candidates :: [PhyloGroup]
candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> getProximity (prox,param) group group') candidates
--------------------------------------
best :: [(PhyloGroupId, Double)]
best = reverse
$ sortOn snd
$ filter (\(id,score) -> score >= thr) scores
--------------------------------------
-- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
makePair to group ids = case to of
Childs -> over (phylo_groupPeriodChilds) addPointers group
Parents -> over (phylo_groupPeriodParents) addPointers group
_ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
where
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers l = nub $ (l ++ ids)
--------------------------------------
-- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
(\groups ->
map (\group ->
if (getGroupLevel group) == lvl
then
let
--------------------------------------
candidates :: [(PhyloGroupId, Double)]
candidates = findBestCandidates to 1 5 thr (prox,param) group p
--------------------------------------
in
makePair to group candidates
else
group ) groups) p
phyloWithPair_1_Childs :: Phylo
phyloWithPair_1_Childs = pairGroupsToGroups Childs 1 0.01 (WeightedLogJaccard,[0]) phyloWithPair_1_Parents
phyloWithPair_1_Parents :: Phylo
phyloWithPair_1_Parents = pairGroupsToGroups Parents 1 0.01 (WeightedLogJaccard,[0]) phyloLinked_0_1
phylo1_p :: Phylo
phylo1_p = pairGroupsToGroups Parents 1 0.01 (WeightedLogJaccard,[0]) phylo1_0_1
------------------------------------------------------------------------
-- | STEP 10 | -- Build the coocurency Matrix of the Phylo
-- | To transform the Fis into a coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [Fis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
$ concat
$ map (\x -> listToUnDirectedCombiWith (\x -> ngramsToIdx x p) $ (Set.toList . fst) x)
$ (concat . elems) m
where
--------------------------------------
fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] $ (concat . elems) m
--------------------------------------
docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 $ (concat . elems) m
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToUnDirectedCombiWith (\x -> ngramsToIdx x p) fisNgrams)
--------------------------------------
-- | STEP 7 | -- Build the coocurency Matrix of the Phylo
phyloCooc :: Map (Int, Int) Double
phyloCooc = fisToCooc phyloFisFiltered phyloLinked_0_1
phyloCooc = fisToCooc phyloFis phylo1_0_1
------------------------------------------------------------------------
-- | STEP 9 | -- Build level 1 of the Phylo
-- | To Cliques into Groups
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Ngrams -> (Clique,Support) -> Map (Date, Date) [Fis] -> Phylo -> PhyloGroup
cliqueToGroup period lvl idx label fis m p =
PhyloGroup ((period, lvl), idx) label ngrams (singleton "support" (fromIntegral $ snd fis)) cooc [] [] [] []
where
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> ngramsToIdx x p)
$ Set.toList
$ fst fis
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
$ fisToCooc (restrictKeys m $ Set.fromList [period]) p
--------------------------------------
-- | STEP 6 | -- Build the level 1 of the Phylo
-- | To transform Fis into PhyloLevels
fisToPhyloLevel :: Map (Date, Date) [Fis] -> Phylo -> Phylo
fisToPhyloLevel m p = over (phylo_periods . traverse)
(\period ->
let periodId = _phylo_periodId period
fisList = zip [1..] (m ! periodId)
in over (phylo_periodLevels)
(\phyloLevels ->
let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis) m p) fisList
in phyloLevels ++ [PhyloLevel (periodId, 1) groups]
) period ) p
phylo1_0_1 :: Phylo
phylo1_0_1 = setLevelLinks (0,1) phylo1_1_0
-- | to do : ajouter ce truc à addPhylolevel puis le rendre polymorphique (Fis/Document -> Group)
phylo1_1_0 :: Phylo
phylo1_1_0 = setLevelLinks (1,0) phylo1
-- aggregateToPhyloLevel' :: (a -> PhyloGroup) -> Map (Date, Date) [a] -> Phylo -> Phylo
-- aggregateToPhyloLevel' f m p = alterPhyloPeriods (\period ->
-- let periodId = _phylo_periodId period
-- aggList = zip [1..] (m ! periodId)
-- in over (phylo_periodLevels)
-- (\phyloLevels ->
-- let groups = map f aggList
-- ) period) p
phyloLinked_0_1 :: Phylo
phyloLinked_0_1 = alterLevelLinks (0,1) phyloLinked_1_0
phyloLinked_1_0 :: Phylo
phyloLinked_1_0 = alterLevelLinks (1,0) phyloWithGroups1
phylo1 :: Phylo
phylo1 = addPhyloLevel (1) phyloFis phylo0_m1_0
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | STEP 8 | -- Create Frequent Items Sets by Period and filter them
-- | To Filter Fis by support
filterFisBySupport :: Bool -> Int -> Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
filterFisBySupport empty min m = case empty of
True -> Map.map (\l -> filterMinorFis min l) m
False -> Map.map (\l -> keepFilled (filterMinorFis) min l) m
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
filterMinorFis :: Int -> [Fis] -> [Fis]
filterMinorFis min l = filter (\fis -> snd fis > min) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) []
in filter (\fis -> elem (fst fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [Fis]
docsToFis docs = map (\d -> Map.toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (words . text) d)) docs
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
phyloFis :: Map (Date, Date) [Fis]
phyloFis = filterFisBySupport False 1 (filterFisByNested (docsToFis phyloDocs))
------------------------------------------------------------------------
-- | STEP 4 | -- Link level 0 to level -1 and reverse
phylo0_m1_0 :: Phylo
phylo0_m1_0 = setLevelLinks ((-1),0) phylo0_0_m1
------------------------------------------------------------------------
-- | STEP 6 | -- Link level 0 to level -1
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups (lvl,lvl') current targets
| lvl < lvl' = setLevelParents current
| lvl > lvl' = setLevelChilds current
| otherwise = current
where
--------------------------------------
setLevelChilds :: PhyloGroup -> PhyloGroup
setLevelChilds = over (phylo_groupLevelChilds) addPointers
--------------------------------------
setLevelParents :: PhyloGroup -> PhyloGroup
setLevelParents = over (phylo_groupLevelParents) addPointers
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target ->
if shouldLink (lvl,lvl')
(_phylo_groupNgrams current)
(_phylo_groupNgrams target )
then Just ((getGroupId target),1)
else Nothing) targets
--------------------------------------
-- | To set the LevelLinks between two lists of PhyloGroups
linkGroupsByLevel :: (Level,Level) -> Phylo -> [PhyloGroup] -> [PhyloGroup]
linkGroupsByLevel (lvl,lvl') p groups = map (\group ->
if getGroupLevel group == lvl
then linkGroupToGroups (lvl,lvl') group (getGroupsWithFilters lvl' (getGroupPeriod group) p)
else group) groups
-- | To set the LevelLink of all the PhyloGroups of a Phylo
alterLevelLinks :: (Level,Level) -> Phylo -> Phylo
alterLevelLinks (lvl,lvl') p = alterPhyloGroups (linkGroupsByLevel (lvl,lvl') p) p
phylo0_0_m1 :: Phylo
phylo0_0_m1 = setLevelLinks (0,(-1)) phylo0
------------------------------------------------------------------------
-- | STEP 3 | -- Build level 0 as a copy of level -1
-- | To do : build a real level 0 !
-- | To clone the last PhyloLevel of each PhyloPeriod and update it with a new LevelValue
......@@ -482,132 +159,25 @@ clonePhyloLevel :: Level -> Phylo -> Phylo
clonePhyloLevel lvl p = alterPhyloLevels (\l -> l ++ [setPhyloLevelId lvl $ head l]) p
phyloWithGroups0 :: Phylo
phyloWithGroups0 = updatePhyloByLevel 0 phyloWithGroupsm1
------------------------------------------------------------------------
-- | STEP 4 | -- Build level -1
-- | To transform a list of Documents into a PhyloLevel
docsToPhyloLevel :: Level -> (Date, Date) -> [Document] -> Phylo -> PhyloLevel
docsToPhyloLevel lvl (d, d') docs p = initPhyloLevel
((d, d'), lvl)
(map (\(f,s) -> initGroup [s] s f lvl d d' p)
$ zip [1..]
$ (nub . concat)
$ map (words . text) docs)
-- | To transform a Map of Periods and Documents into a list of PhyloPeriods
docsToPhyloPeriods :: Level -> Map (Date,Date) [Document] -> Phylo -> [PhyloPeriod]
docsToPhyloPeriods lvl docs p = map (\(id,l) -> initPhyloPeriod id l)
$ Map.toList
$ mapWithKey (\k v -> [docsToPhyloLevel lvl k v p]) docs
-- | To update a Phylo for a given Levels
updatePhyloByLevel :: Level -> Phylo -> Phylo
updatePhyloByLevel lvl p
| lvl < 0 = appendToPhyloPeriods (docsToPhyloPeriods lvl phyloPeriods p) p
| lvl == 0 = clonePhyloLevel lvl p
| lvl == 1 = fisToPhyloLevel phyloFisFiltered p
| lvl > 1 = undefined
| otherwise = panic ("[ERR][Viz.Phylo.Example.updatePhyloByLevel] Level not defined")
instance AppendToPhylo Fis
where
--------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 1 = fisToPhyloLevel m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
instance AppendToPhylo Cluster
where
--------------------------------------
-- | appendByLevel :: Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
addPhyloLevel lvl m p = undefined
--------------------------------------
instance AppendToPhylo Document
where
--------------------------------------
-- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl < 0 = over (phylo_periods) (++ docsToPhyloPeriods lvl m p) p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> -1")
--------------------------------------
phyloWithGroupsm1 :: Phylo
phyloWithGroupsm1 = updatePhyloByLevel (-1) phylo
phylo0 :: Phylo
phylo0 = clonePhyloLevel 0 phylo
------------------------------------------------------------------------
-- | STEP 3 | -- Parse the Documents and group them by Periods
-- | To init a set of periods out of a given Grain and Step
docsToPeriods :: (Ord date, Enum date) => (doc -> date)
-> Grain -> Step -> [doc] -> Map (date, date) [doc]
docsToPeriods _ _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
docsToPeriods f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
where
--------------------------------------
hs = steps g s $ both f (head es, last es)
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
steps s' o' (start,end) = map (\l -> (head l, last l))
$ chunkAlong s' o' [start .. end]
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: PhyloNgrams -> [Document] -> [Document]
parseDocs l docs = map (\(Document d t)
-> Document d ( unwords
$ filter (\x -> Vector.elem x l)
$ monoTexts t)) docs
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: Grain -> Step -> [Document] -> Phylo -> Map (Date, Date) [Document]
groupDocsByPeriod g s docs p = docsToPeriods date g s $ parseDocs (getPhyloNgrams p) docs
phyloPeriods :: Map (Date, Date) [Document]
phyloPeriods = groupDocsByPeriod 5 3 phyloDocs phylo
------------------------------------------------------------------------
-- | STEP 2 | -- Init an initial list of Ngrams and a Phylo
-- | STEP 2 | -- Init a Phylo of level -1 with the Documents
phylo :: Phylo
phylo = initPhylo phyloDocs (initNgrams actants)
phylo = addPhyloLevel (-1) phyloDocs
$ initPhylo (keys phyloDocs) (initNgrams actants)
------------------------------------------------------------------------
-- | STEP 1 | -- Get a list of Document
-- | To transform a corpus of texts into a structured list of Documents
corpusToDocs :: [(Date, Text)] -> [Document]
corpusToDocs l = map (\(d,t) -> Document d t) l
-- | STEP 1 | -- Parse all the Documents and group them by Period
phyloDocs :: [Document]
phyloDocs = corpusToDocs corpus
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod 5 3 (corpusToDocs corpus) (initNgrams actants)
------------------------------------------------------------------------
......
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.LevelMaker
where
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, words, zip)
import Data.Map (Map, (!), empty, restrictKeys, filterWithKey, singleton)
import Data.Set (Set)
import Data.Text (Text, words)
import Data.Tuple.Extra
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
-- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
-- | To create a list of PhyloGroups based on a list of aggregates a
toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
instance PhyloLevelMaker Cluster
where
--------------------------------------
-- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl > 1 = toPhyloLevel lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
--------------------------------------
instance PhyloLevelMaker Fis
where
--------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 1 = toPhyloLevel lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis m p) $ zip [1..] l
--------------------------------------
instance PhyloLevelMaker Document
where
--------------------------------------
-- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl < 0 = toPhyloLevel lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> -1")
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p = map (\(idx,ngram) -> ngramsToGroup (d,d') lvl idx ngram [ngram] p)
$ zip [1..]
$ (nub . concat)
$ map (Text.words . text) l
--------------------------------------
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> Cluster -> Map (Date,Date) [Cluster] -> Phylo -> PhyloGroup
clusterToGroup prd lvl idx lbl groups m p =
PhyloGroup ((prd, lvl), idx) lbl ((sort . nub . concat) $ map getGroupNgrams groups) empty empty [] [] [] (map (\g -> (getGroupId g, 1)) groups)
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> (Clique,Support) -> Map (Date, Date) [Fis] -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ snd fis)) cooc [] [] [] []
where
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> ngramsToIdx x p)
$ Set.toList
$ fst fis
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = filterWithKey (\k _ -> elem (fst k) ngrams && elem (snd k) ngrams)
$ fisToCooc (restrictKeys m $ Set.fromList [prd]) p
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p =
PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> ngramsToIdx x p) ngrams) empty empty [] [] [] []
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
toPhyloLevel :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
toPhyloLevel lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over (phylo_periodLevels)
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.LinkMaker
where
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, words, zip, sortOn, head, null, tail, splitAt, (!!))
import Data.Map (Map)
import Data.Set (Set)
import Data.Tuple.Extra
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Maybe as Maybe
------------------------------------------------------------------------
-- | Make links from Level to Level
-- | To choose a LevelLink strategy based an a given Level
shouldLink :: (Level,Level) -> [Int] -> [Int] -> Bool
shouldLink (lvl,lvl') l l'
| lvl <= 1 = doesContainsOrd l l'
| lvl > 1 = undefined
| otherwise = panic ("[ERR][Viz.Phylo.Tools.shouldLink] LevelLink not defined")
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups :: (Level,Level) -> PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups (lvl,lvl') current targets
| lvl < lvl' = setLevelParents current
| lvl > lvl' = setLevelChilds current
| otherwise = current
where
--------------------------------------
setLevelChilds :: PhyloGroup -> PhyloGroup
setLevelChilds = over (phylo_groupLevelChilds) addPointers
--------------------------------------
setLevelParents :: PhyloGroup -> PhyloGroup
setLevelParents = over (phylo_groupLevelParents) addPointers
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target ->
if shouldLink (lvl,lvl')
(_phylo_groupNgrams current)
(_phylo_groupNgrams target )
then Just ((getGroupId target),1)
else Nothing) targets
--------------------------------------
-- | To set the LevelLinks between two lists of PhyloGroups
linkGroupsByLevel :: (Level,Level) -> Phylo -> [PhyloGroup] -> [PhyloGroup]
linkGroupsByLevel (lvl,lvl') p groups = map (\group ->
if getGroupLevel group == lvl
then linkGroupToGroups (lvl,lvl') group (getGroupsWithFilters lvl' (getGroupPeriod group) p)
else group) groups
-- | To set the LevelLink of all the PhyloGroups of a Phylo
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterPhyloGroups (linkGroupsByLevel (lvl,lvl') p) p
------------------------------------------------------------------------
-- | Make links from Period to Period
-- | To apply the corresponding proximity function based on a given Proximity
getProximity :: (Proximity,[Double]) -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
getProximity (prox,param) g1 g2 = case prox of
WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (param !! 0) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
_ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods :: PairTo -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to id l = case to of
Childs -> unNested id ((tail . snd) next)
Parents -> unNested id ((reverse . fst) next)
_ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PairTo type not defined")
where
--------------------------------------
next :: ([PhyloPeriodId], [PhyloPeriodId])
next = splitAt idx l
--------------------------------------
idx :: Int
idx = case (List.elemIndex id l) of
Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
Just i -> i
--------------------------------------
-- | To have an non-overlapping next period
unNested :: PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
unNested x l
| null l = []
| nested (fst $ head l) x = unNested x (tail l)
| nested (snd $ head l) x = unNested x (tail l)
| otherwise = l
--------------------------------------
nested :: Date -> PhyloPeriodId -> Bool
nested d prd = d >= fst prd && d <= snd prd
--------------------------------------
-- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
findBestCandidates :: PairTo -> Int -> Int -> Double -> (Proximity,[Double]) -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates to depth max thr (prox,param) group p
| depth > max || null next = []
| (not . null) best = take 2 best
| otherwise = findBestCandidates to (depth + 1) max thr (prox,param) group p
where
--------------------------------------
next :: [PhyloPeriodId]
next = getNextPeriods to (getGroupPeriod group) (getPhyloPeriods p)
--------------------------------------
candidates :: [PhyloGroup]
candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> getProximity (prox,param) group group') candidates
--------------------------------------
best :: [(PhyloGroupId, Double)]
best = reverse
$ sortOn snd
$ filter (\(id,score) -> score >= thr) scores
--------------------------------------
-- | To add a new list of Pointers into an existing Childs/Parents list of Pointers
makePair :: PairTo -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup
makePair to group ids = case to of
Childs -> over (phylo_groupPeriodChilds) addPointers group
Parents -> over (phylo_groupPeriodParents) addPointers group
_ -> panic ("[ERR][Viz.Phylo.Example.makePair] PairTo type not defined")
where
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers l = nub $ (l ++ ids)
--------------------------------------
-- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
pairGroupsToGroups :: PairTo -> Level -> Double -> (Proximity,[Double]) -> Phylo -> Phylo
pairGroupsToGroups to lvl thr (prox,param) p = alterPhyloGroups
(\groups ->
map (\group ->
if (getGroupLevel group) == lvl
then
let
--------------------------------------
candidates :: [(PhyloGroupId, Double)]
candidates = findBestCandidates to 1 5 thr (prox,param) group p
--------------------------------------
in
makePair to group candidates
else
group ) groups) p
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Metrics.Clustering
where
import Data.List (last,head,union,concat,null,nub,(++),init,tail)
import Data.Map (Map,elems,adjust,unionWith,intersectionWith)
import Data.Set (Set)
import Data.Tuple (fst, snd)
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp :: Int -> PhyloGroup -> PhyloGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp idx curr (nodes,edges) next memo
| null nodes' && null next' = memo'
| (not . null) next' = relatedComp idx (head next') (nodes',edges) (tail next') memo'
| otherwise = relatedComp (idx + 1) (head nodes') (tail nodes',edges) [] memo'
where
--------------------------------------
memo' :: [[PhyloGroup]]
memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
--------------------------------------
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Metrics.Proximity
where
import Data.List (last,head,union,concat,null)
import Data.Map (Map,elems,adjust,unionWith,intersectionWith)
import Data.Set (Set)
import Data.Tuple (fst, snd)
import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
-- | To process the weightedLogJaccard between two PhyloGroups fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
\ No newline at end of file
......@@ -13,14 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level)
import Data.List (filter, intersect, (++), sort, null, head, tail, last, tails, delete, nub)
import Data.Map (Map, mapKeys, member)
import Data.List (filter, intersect, (++), sort, null, head, tail, last, tails, delete, nub, concat, union)
import Data.Map (Map, mapKeys, member, elems, adjust)
import Data.Set (Set)
import Data.Text (Text, toLower)
import Data.Tuple.Extra
......@@ -264,10 +263,9 @@ initNgrams :: [Ngrams] -> PhyloNgrams
initNgrams l = Vector.fromList $ map toLower l
-- | To init a Phylomemy
initPhylo :: [Document] -> PhyloNgrams -> Phylo
initPhylo docs ngrams = Phylo (both date $ (last &&& head) docs) ngrams [] []
-- | To create a Phylo from a list of PhyloPeriods and Ngrams
initPhylo :: [(Date, Date)] -> PhyloNgrams -> Phylo
initPhylo l ngrams = Phylo ((fst . head) l, (snd . last) l) ngrams (map (\prd -> initPhyloPeriod prd []) l) []
-- | To create a PhyloLevel
initPhyloLevel :: PhyloLevelId -> [PhyloGroup] -> PhyloLevel
......@@ -319,14 +317,6 @@ setPhyloLevelId lvl' (PhyloLevel (id, lvl) groups)
groups' = over (traverse . phylo_groupId) (\((period, lvl), idx) -> ((period, lvl'), idx)) groups
-- | To choose a LevelLink strategy based an a given Level
shouldLink :: (Level,Level) -> [Int] -> [Int] -> Bool
shouldLink (lvl,lvl') l l'
| lvl <= 1 = doesContainsOrd l l'
| lvl > 1 = undefined
| otherwise = panic ("[ERR][Viz.Phylo.Tools.shouldLink] LevelLink not defined")
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment