Commit 0712ec42 authored by Quentin Lobbé's avatar Quentin Lobbé

Add cooc matrix out of Fis

parent fa76819d
...@@ -28,15 +28,23 @@ TODO: ...@@ -28,15 +28,23 @@ TODO:
module Gargantext.Viz.Phylo.Example where module Gargantext.Viz.Phylo.Example where
import Control.Lens hiding (both) import Control.Lens hiding (both)
import Data.List (concat, union, intersect, tails, tail, head, last, null, zip, sort, length, any, (++), nub)
import qualified Data.List as List import qualified Data.List as List
import Data.Text (Text, unwords, toLower, words) import Data.Text (Text, unwords, toLower, words)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Semigroup (Semigroup) import Data.Semigroup (Semigroup)
import Data.Map (Map)
import Data.Map (Map, elems, member, adjust, singleton, (!), keys, restrictKeys, mapWithKey)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Vector (Vector, fromList, elemIndex)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Maybe as Maybe import qualified Data.Maybe as Maybe
import Data.Tuple (fst, snd)
import qualified Data.Tuple as Tuple import qualified Data.Tuple as Tuple
import Data.Bool (Bool, not)
import qualified Data.Bool as Bool import qualified Data.Bool as Bool
import Data.Set (Set) import Data.Set (Set)
...@@ -45,7 +53,7 @@ import qualified Data.Matrix as DM' ...@@ -45,7 +53,7 @@ import qualified Data.Matrix as DM'
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Text.Terms.Mono (monoTexts) import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Prelude import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -107,6 +115,51 @@ appariement = undefined ...@@ -107,6 +115,51 @@ appariement = undefined
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 9 | -- Link the PhyloGroups of level 1 through the Periods -- | STEP 9 | -- Link the PhyloGroups of level 1 through the Periods
shouldPair :: PhyloGroup -> PhyloGroup -> Bool
shouldPair g g' = (not . null) $ intersect (getNgrams g) (getNgrams g')
getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
getKeyPair (x,y) m = case findPair (x,y) m of
Nothing -> panic "PhyloError"
Just i -> i
where
--------------------------------------
findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
findPair (x,y) m
| member (x,y) m = Just (x,y)
| member (y,x) m = Just (y,x)
| otherwise = Nothing
--------------------------------------
listToCombi :: (a -> b) -> [a] -> [(b,b)]
listToCombi f l = [(f x, f y) | (x:rest) <- tails l, y <- rest]
fisToCooc :: Map (Date, Date) Fis -> Map (Int, Int) Double
fisToCooc m = map (\v -> v/docs) $
foldl (\mem x ->
adjust (+1) (getKeyPair x mem) mem) cooc (concat (map (\x ->
listToCombi findIdx $ (Set.toList . fst) x) fis))
where
--------------------------------------
fis :: [(Clique,Support)]
fis = concat $ map (\x -> Map.toList x) (elems m)
--------------------------------------
fisNgrams :: [Ngrams]
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
--------------------------------------
docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToCombi findIdx fisNgrams)
--------------------------------------
phyloWithAppariement1 :: Phylo
phyloWithAppariement1 = phyloLinked_0_1
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 8 | -- Find the Fis out of Documents and Ngrams and build level 1 of the Phylo -- | STEP 8 | -- Find the Fis out of Documents and Ngrams and build level 1 of the Phylo
...@@ -126,18 +179,19 @@ phyloWithGroups1 :: Phylo ...@@ -126,18 +179,19 @@ phyloWithGroups1 :: Phylo
phyloWithGroups1 = updatePhyloByLevel Level_1 phyloLinked_m1_0 phyloWithGroups1 = updatePhyloByLevel Level_1 phyloLinked_m1_0
cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> PhyloGroup
cliqueToGroup period lvl idx label fis = PhyloGroup ((period, lvl), idx) label (List.sort (map (\x -> findIdx x) (Set.toList $ Tuple.fst fis))) (Map.singleton "support" (fromIntegral $ Tuple.snd fis)) [] [] [] [] cliqueToGroup period lvl idx label fis = PhyloGroup ((period, lvl), idx) label (sort (map (\x ->
findIdx x) (Set.toList $ fst fis))) (singleton "support" (fromIntegral $ snd fis)) [] [] [] []
fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
fisToPhyloLevel m p = over (phylo_periods . traverse) fisToPhyloLevel m p = over (phylo_periods . traverse)
(\period -> (\period ->
let periodId = _phylo_periodId period let periodId = _phylo_periodId period
fisList = zip [1..] (Map.toList (m Map.! periodId)) fisList = zip [1..] (Map.toList (m ! periodId))
in over (phylo_periodLevels) in over (phylo_periodLevels)
(\levels -> (\levels ->
let groups = map (\fis -> cliqueToGroup periodId 1 (Tuple.fst fis) "" (Tuple.snd fis)) fisList let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis)) fisList
in (PhyloLevel (periodId, 1) groups) : levels in (PhyloLevel (periodId, 1) groups) : levels
) period ) period
) p ) p
-- | To preserve nonempty periods from filtering, please use : filterFisBySupport False ... -- | To preserve nonempty periods from filtering, please use : filterFisBySupport False ...
...@@ -153,7 +207,7 @@ filterMinorFis :: Int -> Fis -> Fis ...@@ -153,7 +207,7 @@ filterMinorFis :: Int -> Fis -> Fis
filterMinorFis min fis = Map.filter (\s -> s > min) fis filterMinorFis min fis = Map.filter (\s -> s > min) fis
filterMinorFisNonEmpty :: Int -> Fis -> Fis filterMinorFisNonEmpty :: Int -> Fis -> Fis
filterMinorFisNonEmpty min fis = if (Map.null fis') && (Bool.not $ Map.null fis) filterMinorFisNonEmpty min fis = if (Map.null fis') && (not $ Map.null fis)
then filterMinorFisNonEmpty (min - 1) fis then filterMinorFisNonEmpty (min - 1) fis
else fis' else fis'
where where
...@@ -161,25 +215,25 @@ filterMinorFisNonEmpty min fis = if (Map.null fis') && (Bool.not $ Map.null fis) ...@@ -161,25 +215,25 @@ filterMinorFisNonEmpty min fis = if (Map.null fis') && (Bool.not $ Map.null fis)
doesContains :: [Ngrams] -> [Ngrams] -> Bool doesContains :: [Ngrams] -> [Ngrams] -> Bool
doesContains l l' doesContains l l'
| List.null l' = True | null l' = True
| List.length l' > List.length l = False | length l' > length l = False
| List.elem (List.head l') l = doesContains l (List.tail l') | elem (head l') l = doesContains l (tail l')
| otherwise = False | otherwise = False
doesAnyContains :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> Bool doesAnyContains :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> Bool
doesAnyContains h l l' = List.any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' List.++ l) doesAnyContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' ++ l)
filterNestedCliques :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> [Set Ngrams] filterNestedCliques :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> [Set Ngrams]
filterNestedCliques h l l' filterNestedCliques h l l'
| List.null l = if doesAnyContains h l l' | null l = if doesAnyContains h l l'
then l' then l'
else h : l' else h : l'
| doesAnyContains h l l' = filterNestedCliques (List.head l) (List.tail l) l' | doesAnyContains h l l' = filterNestedCliques (head l) (tail l) l'
| otherwise = filterNestedCliques (List.head l) (List.tail l) (h : l') | otherwise = filterNestedCliques (head l) (tail l) (h : l')
filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
filterFisByNested m = Map.map(\fis -> Map.restrictKeys fis (Set.fromList (filterNestedCliques (List.head (Map.keys fis)) (Map.keys fis) []))) m filterFisByNested m = map (\fis -> restrictKeys fis (Set.fromList (filterNestedCliques (head (keys fis)) (keys fis) []))) m
phyloFis :: Map (Date, Date) Fis phyloFis :: Map (Date, Date) Fis
phyloFis = termsToFis phyloTerms phyloFis = termsToFis phyloTerms
...@@ -193,7 +247,7 @@ termsToFis = corpusToFis (words . text) ...@@ -193,7 +247,7 @@ termsToFis = corpusToFis (words . text)
corpusToFis :: (Document -> [Ngrams]) corpusToFis :: (Document -> [Ngrams])
-> Map (Date, Date) [Document] -> Map (Date, Date) [Document]
-> Map (Date, Date) (Map (Set Ngrams) Int) -> Map (Date, Date) (Map (Set Ngrams) Int)
corpusToFis f = Map.map (\d -> fisWithSizePolyMap (Segment 1 20) 1 (map f d)) corpusToFis f = map (\d -> fisWithSizePolyMap (Segment 1 20) 1 (map f d))
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -217,6 +271,9 @@ addPointer :: Semigroup field ...@@ -217,6 +271,9 @@ addPointer :: Semigroup field
addPointer field targetPointer current = addPointer field targetPointer current =
set field (<> targetPointer) current set field (<> targetPointer) current
getNgrams :: PhyloGroup -> [Int]
getNgrams g = _phylo_groupNgrams g
getGroups :: Phylo -> [PhyloGroup] getGroups :: Phylo -> [PhyloGroup]
getGroups = view (phylo_periods . traverse . phylo_periodLevels . traverse . phylo_levelGroups) getGroups = view (phylo_periods . traverse . phylo_periodLevels . traverse . phylo_levelGroups)
...@@ -224,20 +281,20 @@ getGroupId :: PhyloGroup -> PhyloGroupId ...@@ -224,20 +281,20 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = view (phylo_groupId) getGroupId = view (phylo_groupId)
getGroupLvl :: PhyloGroup -> Int getGroupLvl :: PhyloGroup -> Int
getGroupLvl group = Tuple.snd $ Tuple.fst $ getGroupId group getGroupLvl group = snd $ fst $ getGroupId group
getGroupPeriod :: PhyloGroup -> (Date,Date) getGroupPeriod :: PhyloGroup -> (Date,Date)
getGroupPeriod group = Tuple.fst $ Tuple.fst $ getGroupId group getGroupPeriod group = fst $ fst $ getGroupId group
getGroupsByLevelAndPeriod :: Int -> (Date,Date) -> Phylo -> [PhyloGroup] getGroupsByLevelAndPeriod :: Int -> (Date,Date) -> Phylo -> [PhyloGroup]
getGroupsByLevelAndPeriod lvl period p = List.filter (\group -> (getGroupLvl group == lvl) && (getGroupPeriod group == period)) (getGroups p) getGroupsByLevelAndPeriod lvl period p = List.filter (\group -> (getGroupLvl group == lvl) && (getGroupPeriod group == period)) (getGroups p)
containsIdx :: [Int] -> [Int] -> Bool containsIdx :: [Int] -> [Int] -> Bool
containsIdx l l' containsIdx l l'
| List.null l' = False | null l' = False
| List.last l < List.head l' = False | last l < head l' = False
| List.head l' `List.elem` l = True | head l' `elem` l = True
| otherwise = containsIdx l (List.tail l') | otherwise = containsIdx l (tail l')
shouldLink :: LinkLvl -> [Int] -> [Int] -> Bool shouldLink :: LinkLvl -> [Int] -> [Int] -> Bool
shouldLink lvl current target = case linkLvlLabel lvl of shouldLink lvl current target = case linkLvlLabel lvl of
...@@ -261,13 +318,13 @@ linkGroupToGroups lvl current targets ...@@ -261,13 +318,13 @@ linkGroupToGroups lvl current targets
setLevelParents = over (phylo_groupLevelParents) addPointers setLevelParents = over (phylo_groupLevelParents) addPointers
addPointers :: [Pointer] -> [Pointer] addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp List.++ Maybe.mapMaybe (\target -> if shouldLink lvl (_phylo_groupNgrams current) (_phylo_groupNgrams target) addPointers lp = lp ++ Maybe.mapMaybe (\target -> if shouldLink lvl (_phylo_groupNgrams current) (_phylo_groupNgrams target)
then Just ((getGroupId target),1) then Just ((getGroupId target),1)
else Nothing else Nothing
) targets ) targets
addPointers' :: [Pointer] -> [Pointer] addPointers' :: [Pointer] -> [Pointer]
addPointers' lp = lp List.++ map (\target -> ((getGroupId target),1)) targets addPointers' lp = lp ++ map (\target -> ((getGroupId target),1)) targets
linkGroupsByLevel :: LinkLvl -> Phylo -> [PhyloGroup] -> [PhyloGroup] linkGroupsByLevel :: LinkLvl -> Phylo -> [PhyloGroup] -> [PhyloGroup]
linkGroupsByLevel lvl p groups = map (\group -> linkGroupsByLevel lvl p groups = map (\group ->
...@@ -300,7 +357,7 @@ setPhyloLevel lvl (PhyloLevel (periodId, lvl') lvlGroups) ...@@ -300,7 +357,7 @@ setPhyloLevel lvl (PhyloLevel (periodId, lvl') lvlGroups)
lvlGroups' = map (\g -> setGroupIdLvl lvl g) lvlGroups lvlGroups' = map (\g -> setGroupIdLvl lvl g) lvlGroups
copyPhyloLevel :: Int -> [PhyloLevel] -> [PhyloLevel] copyPhyloLevel :: Int -> [PhyloLevel] -> [PhyloLevel]
copyPhyloLevel lvl l = (setPhyloLevel lvl (List.head l)) : l copyPhyloLevel lvl l = (setPhyloLevel lvl (head l)) : l
alterLvl :: Int -> [PhyloPeriod] -> [PhyloPeriod] alterLvl :: Int -> [PhyloPeriod] -> [PhyloPeriod]
alterLvl lvl l = map (\p -> PhyloPeriod (_phylo_periodId p) (copyPhyloLevel lvl $ _phylo_periodLevels p)) l alterLvl lvl l = map (\p -> PhyloPeriod (_phylo_periodId p) (copyPhyloLevel lvl $ _phylo_periodLevels p)) l
...@@ -314,30 +371,30 @@ phyloWithGroups0 = updatePhyloByLevel Level_0 phyloWithGroupsm1 ...@@ -314,30 +371,30 @@ phyloWithGroups0 = updatePhyloByLevel Level_0 phyloWithGroupsm1
findIdx :: Ngrams -> Int findIdx :: Ngrams -> Int
findIdx n = case (Vector.elemIndex n (_phylo_ngrams phylo)) of findIdx n = case (elemIndex n (_phylo_ngrams phylo)) of
Nothing -> panic "PhyloError" Nothing -> panic "PhyloError"
Just i -> i Just i -> i
ngramsToGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> PhyloGroup ngramsToGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> PhyloGroup
ngramsToGroup terms label idx lvl from to = PhyloGroup (((from, to), lvl), idx) label (List.sort (map (\x -> findIdx x) terms)) (Map.empty) [] [] [] [] ngramsToGroup terms label idx lvl from to = PhyloGroup (((from, to), lvl), idx) label (sort (map (\x -> findIdx x) terms)) (Map.empty) [] [] [] []
docsToLevel :: (Date, Date) -> Corpus -> PhyloLevel docsToLevel :: (Date, Date) -> Corpus -> PhyloLevel
docsToLevel k v = PhyloLevel (k,(-1)) (map (\x -> docsToLevel k v = PhyloLevel (k,(-1)) (map (\x ->
ngramsToGroup [Tuple.snd x] (Tuple.snd x) (Tuple.fst x) (-1) (Tuple.fst k) (Tuple.snd k) ngramsToGroup [snd x] (snd x) (fst x) (-1) (fst k) (snd k)
) $ zip [1..] $ (List.nub . List.concat) $ map (words . text) v) ) $ zip [1..] $ (nub . concat) $ map (words . text) v)
corpusToPhyloPeriod :: Map (Date,Date) Corpus -> [PhyloPeriod] corpusToPhyloPeriod :: Map (Date,Date) Corpus -> [PhyloPeriod]
corpusToPhyloPeriod corpus = map (\x -> PhyloPeriod (Tuple.fst x) [(Tuple.snd x)]) $ zip (Map.keys mapLvl) (Map.elems mapLvl) corpusToPhyloPeriod corpus = map (\x -> PhyloPeriod (fst x) [(snd x)]) $ zip (keys mapLvl) (elems mapLvl)
where where
mapLvl :: Map (Date,Date) PhyloLevel mapLvl :: Map (Date,Date) PhyloLevel
mapLvl = Map.mapWithKey docsToLevel corpus mapLvl = mapWithKey docsToLevel corpus
updatePhyloByLevel :: Levels -> Phylo -> Phylo updatePhyloByLevel :: Levels -> Phylo -> Phylo
updatePhyloByLevel lvl (Phylo pDuration pNgrams pPeriods) updatePhyloByLevel lvl (Phylo pDuration pNgrams pPeriods)
= case lvl of = case lvl of
Level_m1 -> Phylo pDuration pNgrams pPeriods' Level_m1 -> Phylo pDuration pNgrams pPeriods'
where pPeriods' = (corpusToPhyloPeriod phyloTerms) List.++ pPeriods where pPeriods' = (corpusToPhyloPeriod phyloTerms) ++ pPeriods
Level_0 -> Phylo pDuration pNgrams pPeriods' Level_0 -> Phylo pDuration pNgrams pPeriods'
where pPeriods' = alterLvl 0 pPeriods where pPeriods' = alterLvl 0 pPeriods
...@@ -362,7 +419,7 @@ toPeriodes :: (Ord date, Enum date) => (doc -> date) ...@@ -362,7 +419,7 @@ toPeriodes :: (Ord date, Enum date) => (doc -> date)
toPeriodes _ _ _ [] = panic "Empty corpus can not have any periods" toPeriodes _ _ _ [] = panic "Empty corpus can not have any periods"
toPeriodes f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs toPeriodes f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
where where
hs = steps g s $ both f (List.head es, List.last es) hs = steps g s $ both f (head es, last es)
-------------------------------------------------------------------- --------------------------------------------------------------------
-- | Define overlapping periods of time by following regular steps -- | Define overlapping periods of time by following regular steps
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
...@@ -371,18 +428,18 @@ toPeriodes f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs ...@@ -371,18 +428,18 @@ toPeriodes f g s es = Map.fromList $ zip hs $ map (inPeriode f es) hs
-------------------------------------------------------------------- --------------------------------------------------------------------
-- | Find steps of linear and homogenous time of integer -- | Find steps of linear and homogenous time of integer
steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)] steps :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
steps s' o' (start,end) = map (\l -> (List.head l, List.last l)) steps s' o' (start,end) = map (\l -> (head l, last l))
$ chunkAlong s' o' [start .. end] $ chunkAlong s' o' [start .. end]
cleanCorpus :: MapList -> Corpus -> Corpus cleanCorpus :: MapList -> Corpus -> Corpus
cleanCorpus ml = map (\(Document d t) -> Document d (unwords $ filter (\x -> elem x ml) $ monoTexts t)) cleanCorpus ml = map (\(Document d t) -> Document d (unwords $ filter (\x -> elem x ml) $ monoTexts t))
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 2 | -- Find some Ngrams (ie: phyloGroup of level -1) out of the Corpus & init the phylo -- | STEP 2 | -- Find some Ngrams (ie: phyloGroup of level -1) out of the Corpus & init the phylo
phylo = Phylo (both date $ (List.last &&& List.head) phyloCorpus) phyloNgrams [] phylo = Phylo (both date $ (last &&& head) phyloCorpus) phyloNgrams []
phyloNgrams :: PhyloNgrams phyloNgrams :: PhyloNgrams
phyloNgrams = Vector.fromList cleanedActants phyloNgrams = Vector.fromList cleanedActants
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment