Commit 5741fc28 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Phylo] Reading cosmetics / New Tools file.

parent 69a8e0db
Pipeline #229 canceled with stage
...@@ -55,6 +55,7 @@ import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) ...@@ -55,6 +55,7 @@ import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Text.Terms.Mono (monoTexts) import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Prelude hiding (head) import Gargantext.Prelude hiding (head)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Types | -- -- | Types | --
...@@ -63,7 +64,7 @@ import Gargantext.Viz.Phylo ...@@ -63,7 +64,7 @@ import Gargantext.Viz.Phylo
-- | Date : a simple Integer -- | Date : a simple Integer
type Date = Int type Date = Int
-- | Document : a piece of Text linked to a Date -- | Document : a piece of Text linked to a Date
data Document = Document data Document = Document
{ date :: Date { date :: Date
, text :: Text , text :: Text
} deriving (Show) } deriving (Show)
...@@ -83,7 +84,7 @@ data Levels = Level_m1 | Level_0 | Level_1 | Level_2 | Level_N ...@@ -83,7 +84,7 @@ data Levels = Level_m1 | Level_0 | Level_1 | Level_2 | Level_N
data LinkLvlLabel = Link_m1_0 | Link_0_m1 | Link_1_0 | Link_0_1 | Link_x_y data LinkLvlLabel = Link_m1_0 | Link_0_m1 | Link_1_0 | Link_0_1 | Link_x_y
deriving (Show, Eq, Enum, Bounded) deriving (Show, Eq, Enum, Bounded)
data LinkLvl = LinkLvl data LinkLvl = LinkLvl
{ linkLvlLabel :: LinkLvlLabel { linkLvlLabel :: LinkLvlLabel
...@@ -116,17 +117,17 @@ appariement = undefined ...@@ -116,17 +117,17 @@ appariement = undefined
-- | STEP 9 | -- Link the PhyloGroups of level 1 through the Periods -- | STEP 9 | -- Link the PhyloGroups of level 1 through the Periods
shouldPair :: PhyloGroup -> PhyloGroup -> Bool shouldPair :: PhyloGroup -> PhyloGroup -> Bool
shouldPair g g' = (not . null) $ intersect (getNgrams g) (getNgrams g') shouldPair g g' = (not . null) $ intersect (getNgrams g) (getNgrams g')
getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int) getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
getKeyPair (x,y) m = case findPair (x,y) m of getKeyPair (x,y) m = case findPair (x,y) m of
Nothing -> panic "PhyloError" Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
Just i -> i Just i -> i
where where
-------------------------------------- --------------------------------------
findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int) findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
findPair (x,y) m findPair (x,y) m
| member (x,y) m = Just (x,y) | member (x,y) m = Just (x,y)
| member (y,x) m = Just (y,x) | member (y,x) m = Just (y,x)
| otherwise = Nothing | otherwise = Nothing
...@@ -136,11 +137,11 @@ listToCombi :: (a -> b) -> [a] -> [(b,b)] ...@@ -136,11 +137,11 @@ listToCombi :: (a -> b) -> [a] -> [(b,b)]
listToCombi f l = [(f x, f y) | (x:rest) <- tails l, y <- rest] listToCombi f l = [(f x, f y) | (x:rest) <- tails l, y <- rest]
fisToCooc :: Map (Date, Date) Fis -> Map (Int, Int) Double fisToCooc :: Map (Date, Date) Fis -> Map (Int, Int) Double
fisToCooc m = map (\v -> v/docs) $ fisToCooc m = map (/docs)
foldl (\mem x -> $ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
adjust (+1) (getKeyPair x mem) mem) cooc (concat (map (\x -> $ concat
listToCombi findIdx $ (Set.toList . fst) x) fis)) $ map (\x -> listToCombi findIdx $ (Set.toList . fst) x) fis
where where
-------------------------------------- --------------------------------------
fis :: [(Clique,Support)] fis :: [(Clique,Support)]
fis = concat $ map (\x -> Map.toList x) (elems m) fis = concat $ map (\x -> Map.toList x) (elems m)
...@@ -149,7 +150,7 @@ fisToCooc m = map (\v -> v/docs) $ ...@@ -149,7 +150,7 @@ fisToCooc m = map (\v -> v/docs) $
fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis fisNgrams = foldl (\mem x -> union mem $ (Set.toList . fst) x) [] fis
-------------------------------------- --------------------------------------
docs :: Double docs :: Double
docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis docs = fromIntegral $ foldl (\mem x -> mem + (snd x)) 0 fis
-------------------------------------- --------------------------------------
cooc :: Map (Int, Int) (Double) cooc :: Map (Int, Int) (Double)
cooc = Map.fromList $ map (\x -> (x,0)) (listToCombi findIdx fisNgrams) cooc = Map.fromList $ map (\x -> (x,0)) (listToCombi findIdx fisNgrams)
...@@ -179,19 +180,22 @@ phyloWithGroups1 :: Phylo ...@@ -179,19 +180,22 @@ phyloWithGroups1 :: Phylo
phyloWithGroups1 = updatePhyloByLevel Level_1 phyloLinked_m1_0 phyloWithGroups1 = updatePhyloByLevel Level_1 phyloLinked_m1_0
cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> PhyloGroup cliqueToGroup :: PhyloPeriodId -> Int -> Int -> Ngrams -> (Clique,Support) -> PhyloGroup
cliqueToGroup period lvl idx label fis = PhyloGroup ((period, lvl), idx) label (sort (map (\x -> cliqueToGroup period lvl idx label fis = PhyloGroup ((period, lvl), idx)
findIdx x) (Set.toList $ fst fis))) (singleton "support" (fromIntegral $ snd fis)) [] [] [] [] label
(sort $ map (\x -> findIdx x) $ Set.toList $ fst fis)
(singleton "support" (fromIntegral $ snd fis))
[] [] [] []
fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo fisToPhyloLevel :: Map (Date, Date) Fis -> Phylo -> Phylo
fisToPhyloLevel m p = over (phylo_periods . traverse) fisToPhyloLevel m p = over (phylo_periods . traverse)
(\period -> (\period ->
let periodId = _phylo_periodId period let periodId = _phylo_periodId period
fisList = zip [1..] (Map.toList (m ! periodId)) fisList = zip [1..] (Map.toList (m ! periodId))
in over (phylo_periodLevels) in over (phylo_periodLevels)
(\levels -> (\levels ->
let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis)) fisList let groups = map (\fis -> cliqueToGroup periodId 1 (fst fis) "" (snd fis)) fisList
in (PhyloLevel (periodId, 1) groups) : levels in (PhyloLevel (periodId, 1) groups) : levels
) period ) period
) p ) p
-- | To preserve nonempty periods from filtering, please use : filterFisBySupport False ... -- | To preserve nonempty periods from filtering, please use : filterFisBySupport False ...
...@@ -201,7 +205,7 @@ phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis) ...@@ -201,7 +205,7 @@ phyloFisFiltered = filterFisBySupport True 1 (filterFisByNested phyloFis)
filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis filterFisBySupport :: Bool -> Int -> Map (Date, Date) Fis -> Map (Date, Date) Fis
filterFisBySupport empty min m = case empty of filterFisBySupport empty min m = case empty of
True -> Map.map (\fis -> filterMinorFis min fis) m True -> Map.map (\fis -> filterMinorFis min fis) m
False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m False -> Map.map (\fis -> filterMinorFisNonEmpty min fis) m
filterMinorFis :: Int -> Fis -> Fis filterMinorFis :: Int -> Fis -> Fis
filterMinorFis min fis = Map.filter (\s -> s > min) fis filterMinorFis min fis = Map.filter (\s -> s > min) fis
...@@ -217,8 +221,8 @@ doesContains :: [Ngrams] -> [Ngrams] -> Bool ...@@ -217,8 +221,8 @@ doesContains :: [Ngrams] -> [Ngrams] -> Bool
doesContains l l' doesContains l l'
| null l' = True | null l' = True
| length l' > length l = False | length l' > length l = False
| elem (head l') l = doesContains l (tail l') | elem (head l') l = doesContains l (tail l')
| otherwise = False | otherwise = False
doesAnyContains :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> Bool doesAnyContains :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> Bool
doesAnyContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' ++ l) doesAnyContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' ++ l)
...@@ -226,14 +230,17 @@ doesAnyContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) ...@@ -226,14 +230,17 @@ doesAnyContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h))
filterNestedCliques :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> [Set Ngrams] filterNestedCliques :: Set Ngrams -> [Set Ngrams] -> [Set Ngrams] -> [Set Ngrams]
filterNestedCliques h l l' filterNestedCliques h l l'
| null l = if doesAnyContains h l l' | null l = if doesAnyContains h l l'
then l' then l'
else h : l' else h : l'
| doesAnyContains h l l' = filterNestedCliques (head l) (tail l) l' | doesAnyContains h l l' = filterNestedCliques (head l) (tail l) l'
| otherwise = filterNestedCliques (head l) (tail l) (h : l') | otherwise = filterNestedCliques (head l) (tail l) (h : l')
filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis filterFisByNested :: Map (Date, Date) Fis -> Map (Date, Date) Fis
filterFisByNested m = map (\fis -> restrictKeys fis (Set.fromList (filterNestedCliques (head (keys fis)) (keys fis) []))) m filterFisByNested = map (\fis -> restrictKeys fis
$ Set.fromList
$ filterNestedCliques (head (keys fis)) (keys fis) []
)
phyloFis :: Map (Date, Date) Fis phyloFis :: Map (Date, Date) Fis
phyloFis = termsToFis phyloTerms phyloFis = termsToFis phyloTerms
...@@ -272,22 +279,25 @@ addPointer field targetPointer current = ...@@ -272,22 +279,25 @@ addPointer field targetPointer current =
set field (<> targetPointer) current set field (<> targetPointer) current
getNgrams :: PhyloGroup -> [Int] getNgrams :: PhyloGroup -> [Int]
getNgrams g = _phylo_groupNgrams g getNgrams = _phylo_groupNgrams
getGroups :: Phylo -> [PhyloGroup] getGroups :: Phylo -> [PhyloGroup]
getGroups = view (phylo_periods . traverse . phylo_periodLevels . traverse . phylo_levelGroups) getGroups = view (phylo_periods . traverse . phylo_periodLevels . traverse . phylo_levelGroups)
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = view (phylo_groupId) getGroupId = view (phylo_groupId)
getGroupLvl :: PhyloGroup -> Int getGroupLvl :: PhyloGroup -> Int
getGroupLvl group = snd $ fst $ getGroupId group getGroupLvl = snd . fst . getGroupId
getGroupPeriod :: PhyloGroup -> (Date,Date) getGroupPeriod :: PhyloGroup -> (Date,Date)
getGroupPeriod group = fst $ fst $ getGroupId group getGroupPeriod = fst . fst . getGroupId
getGroupsByLevelAndPeriod :: Int -> (Date,Date) -> Phylo -> [PhyloGroup] getGroupsByLevelAndPeriod :: Int -> (Date,Date) -> Phylo -> [PhyloGroup]
getGroupsByLevelAndPeriod lvl period p = List.filter (\group -> (getGroupLvl group == lvl) && (getGroupPeriod group == period)) (getGroups p) getGroupsByLevelAndPeriod lvl period p = List.filter testGroup (getGroups p)
where
testGroup group = (getGroupLvl group == lvl )
&& (getGroupPeriod group == period)
containsIdx :: [Int] -> [Int] -> Bool containsIdx :: [Int] -> [Int] -> Bool
containsIdx l l' containsIdx l l'
...@@ -318,13 +328,14 @@ linkGroupToGroups lvl current targets ...@@ -318,13 +328,14 @@ linkGroupToGroups lvl current targets
setLevelParents = over (phylo_groupLevelParents) addPointers setLevelParents = over (phylo_groupLevelParents) addPointers
addPointers :: [Pointer] -> [Pointer] addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target -> if shouldLink lvl (_phylo_groupNgrams current) (_phylo_groupNgrams target) addPointers lp = lp ++ Maybe.mapMaybe (\target -> if shouldLink lvl (_phylo_groupNgrams current)
(_phylo_groupNgrams target )
then Just ((getGroupId target),1) then Just ((getGroupId target),1)
else Nothing else Nothing
) targets ) targets
addPointers' :: [Pointer] -> [Pointer] addPointers' :: [Pointer] -> [Pointer]
addPointers' lp = lp ++ map (\target -> ((getGroupId target),1)) targets addPointers' lp = lp ++ map (\target -> ((getGroupId target),1)) targets
linkGroupsByLevel :: LinkLvl -> Phylo -> [PhyloGroup] -> [PhyloGroup] linkGroupsByLevel :: LinkLvl -> Phylo -> [PhyloGroup] -> [PhyloGroup]
linkGroupsByLevel lvl p groups = map (\group -> linkGroupsByLevel lvl p groups = map (\group ->
...@@ -333,7 +344,13 @@ linkGroupsByLevel lvl p groups = map (\group -> ...@@ -333,7 +344,13 @@ linkGroupsByLevel lvl p groups = map (\group ->
else group ) groups else group ) groups
phyloToLinks :: LinkLvl -> Phylo -> Phylo phyloToLinks :: LinkLvl -> Phylo -> Phylo
phyloToLinks lvl p = over (phylo_periods . traverse . phylo_periodLevels . traverse . phylo_levelGroups) (\groups -> linkGroupsByLevel lvl p groups) p phyloToLinks lvl p = over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
)
(linkGroupsByLevel lvl p) p
phyloLinked_0_m1 :: Phylo phyloLinked_0_m1 :: Phylo
phyloLinked_0_m1 = phyloToLinks lvl_0_m1 phyloWithGroups0 phyloLinked_0_m1 = phyloToLinks lvl_0_m1 phyloWithGroups0
...@@ -376,7 +393,11 @@ findIdx n = case (elemIndex n (_phylo_ngrams phylo)) of ...@@ -376,7 +393,11 @@ findIdx n = case (elemIndex n (_phylo_ngrams phylo)) of
Just i -> i Just i -> i
ngramsToGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> PhyloGroup ngramsToGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> PhyloGroup
ngramsToGroup terms label idx lvl from to = PhyloGroup (((from, to), lvl), idx) label (sort (map (\x -> findIdx x) terms)) (Map.empty) [] [] [] [] ngramsToGroup terms label idx lvl from to = PhyloGroup (((from, to), lvl), idx)
label
(sort (map (\x -> findIdx x) terms))
(Map.empty)
[] [] [] []
docsToLevel :: (Date, Date) -> Corpus -> PhyloLevel docsToLevel :: (Date, Date) -> Corpus -> PhyloLevel
docsToLevel k v = PhyloLevel (k,(-1)) (map (\x -> docsToLevel k v = PhyloLevel (k,(-1)) (map (\x ->
......
{-| {-|
Module : Gargantext.Viz.Phylo.Tools Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy tools Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Phylo Toolbox:
- functions to build a Phylo
- functions to filter the cliques
- functions to manage a Phylo
Group Functions (TODO list)
- cohesion sur un groupe
- distance au dernier branchement
- âge du groupe
Futre Idea: temporal zoom on Phylo
phyloZoomOut :: (PeriodGrain, Phylo) -> [(PeriodGrain, Phylo)]
(from smallest granularity, it increases (zoom out) the periods of the Phylo)
Moral idea: viz from out to in
-} -}
...@@ -28,76 +14,8 @@ Moral idea: viz from out to in ...@@ -28,76 +14,8 @@ Moral idea: viz from out to in
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Phylo.Tools where module Gargantext.Viz.Phylo.Tools
where
import Data.Set (Set)
import Data.Map (Map)
import qualified Data.Map as Map
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Example
type MinSize = Int
-- | Building a phylo
-- (Indicative and schematic function)
-- buildPhylo :: Support -> MinSize
-- -> Map Clique Support -> Phylo
-- buildPhylo s m mcs = level2Phylo
-- . groups2level
-- . clusters2group
-- . Map.map clique2cluster
-- . filterCliques s m
level2Phylo :: PhyloLevel -> Phylo -> Phylo
level2Phylo = undefined
groups2level :: [PhyloGroup] -> PhyloLevel
groups2level = undefined
-- clusters2group :: [Cluster Ngrams] -> PhyloGroup
-- clusters2group = undefined
-- clique2cluster :: Clique -> Cluster Ngrams
-- clique2cluster = undefined
-- | Filtering the cliques before bulding the Phylo
-- (Support and MinSize as parameter of the finale function to build a phylo)
-- idea: log of Corpus size (of docs)
filterCliques :: Support -> MinSize
-> Map Clique Support -> [Clique]
filterCliques s ms = maximalCliques
. filterWithSizeSet ms
. Map.keys
. filterWithSupport s
-- | Hapaxify / Threshold
-- hapax s = 1
-- ?
filterWithSupport :: Support -> Map Clique Support -> Map Clique Support
filterWithSupport s = Map.filter (>s)
filterWithSizeSet :: MinSize -> [Clique] -> [Clique]
filterWithSizeSet = undefined
-- | filtre les cliques de ngrams compris dans une clique plus grande
-- /!\ optim inside
maximalCliques :: [Clique] -> [Clique]
maximalCliques = undefined
-- | Phylo management
-- | PhyloLevel Management
viewGroups :: (Start,End) -> PhyloLevel -> Phylo -> [PhyloGroup]
viewGroups = undefined
viewLevels :: (Start,End) -> Phylo -> [PhyloLevel]
viewLevels = undefined
-- | tous les terme des champs, tous les parents et les enfants
setGroup :: PhyloGroup -> PhyloGroup -> PhyloGroup
setGroup = undefined
--removeTerms :: recalculer les cliques pour ces termes
--addTerms
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment