Commit 91e81646 authored by qlobbe's avatar qlobbe

to the level 1

parent 3f949532
Pipeline #543 failed with stage
...@@ -172,7 +172,7 @@ data Phylo = ...@@ -172,7 +172,7 @@ data Phylo =
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: [PhyloPeriod] , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -184,10 +184,9 @@ type PhyloPeriodId = (Date,Date) ...@@ -184,10 +184,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy -- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity -- levels: levels of granularity
data PhyloPeriod = data PhyloPeriod =
PhyloPeriod { _phylo_periodId :: PhyloPeriodId PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodLevels :: [PhyloLevel] , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} } deriving (Generic, Show, Eq)
deriving (Generic, Show, Eq)
-- | Level : a level of clustering -- | Level : a level of clustering
...@@ -202,26 +201,41 @@ type PhyloLevelId = (PhyloPeriodId,Level) ...@@ -202,26 +201,41 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis) -- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis) -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel = data PhyloLevel =
PhyloLevel { _phylo_levelId :: PhyloLevelId PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelGroups :: [PhyloGroup] , _phylo_levelLevel :: Level
} , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
}
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
-------------------- type PhyloGroupId = (PhyloLevelId, Int)
-- | PhyloGroup | --
--------------------
type Index = Int -- | BranchId : (a level, a sequence of branch index)
type PhyloGroupId = (PhyloLevelId, Index) -- the sequence is a path of heritage from the most to the less specific branch
type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period -- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup = data PhyloGroup =
PhyloGroup { _phylo_groupId :: PhyloGroupId PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int
, _phylo_groupSupport :: Support
, _phylo_groupNgrams :: [Int]
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupBreakPointer :: Maybe Pointer
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
-- | Weight : A generic mesure that can be associated with an Id
type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
--------------------------- ---------------------------
-- | Frequent Item Set | -- -- | Frequent Item Set | --
......
...@@ -35,6 +35,14 @@ import Control.Lens ...@@ -35,6 +35,14 @@ import Control.Lens
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1 :: Phylo
phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
--------------------------------------------- ---------------------------------------------
-- | STEP 2 | -- Build the frequent items set -- | STEP 2 | -- Build the frequent items set
--------------------------------------------- ---------------------------------------------
......
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++)) import Data.List (concat, nub, partition, sort, (++))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, mapWithKey, toList, elems) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!))
import Data.Set (size) import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -29,7 +29,7 @@ import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) ...@@ -29,7 +29,7 @@ import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Control.DeepSeq (NFData) import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace) import Debug.Trace (trace)
import Control.Lens import Control.Lens hiding (Level)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -41,11 +41,11 @@ import qualified Data.Set as Set ...@@ -41,11 +41,11 @@ import qualified Data.Set as Set
toPhylo :: [Document] -> TermList -> Config -> Phylo toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = phyloBase toPhylo docs lst conf = phylo1
where where
-------------------------------------- --------------------------------------
_phylo1 :: Phylo phylo1 :: Phylo
_phylo1 = toPhylo1 docs phyloBase phylo1 = toPhylo1 docs phyloBase
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf phyloBase = toPhyloBase docs lst conf
...@@ -58,15 +58,43 @@ toPhylo docs lst conf = phyloBase ...@@ -58,15 +58,43 @@ toPhylo docs lst conf = phyloBase
-------------------- --------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then
let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [(((pId,lvl),length groups),f obj pId lvl (length groups) (getRoots phylo))] ) [] phyloFis)
else
phyloLvl )
phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup
fisToGroup fis pId lvl idx fdt =
PhyloGroup pId lvl idx
(fis ^. phyloFis_support)
(ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt)
(1,[])
[] [] [] []
Nothing
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = undefined toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where where
-------------------------------------- --------------------------------------
_mFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloFis]
_mFis = toPhyloFis _docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase) phyloFis = toPhyloFis docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase)
-------------------------------------- --------------------------------------
_docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
_docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
-------------------------------------- --------------------------------------
...@@ -108,19 +136,22 @@ filterFisByNested m = ...@@ -108,19 +136,22 @@ filterFisByNested m =
-- | To transform a time map of docs innto a time map of Fis with some filters -- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis] toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis]
toPhyloFis mDocs support clique = traceFis "Filtered Fis" toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested $ filterFisByNested
$ traceFis "Filtered by clique size" $ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique) $ filterFis True clique (filterFisByClique)
$ traceFis "Filtered by support" $ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport) $ filterFis True support (filterFisBySupport)
$ traceFis "Unfiltered Fis" mFis $ traceFis "Unfiltered Fis" phyloFis
where where
-------------------------------------- --------------------------------------
-- | create the fis from the docs for each period phyloFis :: Map (Date,Date) [PhyloFis]
mFis :: Map (Date,Date) [PhyloFis] phyloFis =
mFis = mapWithKey (\prd docs -> let fis = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) prd) fis ) mDocs in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
-------------------------------------- --------------------------------------
...@@ -173,6 +204,11 @@ nbDocsByTime docs step = ...@@ -173,6 +204,11 @@ nbDocsByTime docs step =
$ unionWith (+) time docs' $ unionWith (+) time docs'
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
-- | To init the basic elements of a Phylo -- | To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf = toPhyloBase docs lst conf =
...@@ -184,4 +220,4 @@ toPhyloBase docs lst conf = ...@@ -184,4 +220,4 @@ toPhyloBase docs lst conf =
(docsToCoocByYear docs (foundations ^. foundations_roots) conf) (docsToCoocByYear docs (foundations ^. foundations_roots) conf)
(nbDocsByTime docs $ timeUnit conf) (nbDocsByTime docs $ timeUnit conf)
params params
(map (\prd -> PhyloPeriod prd []) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
...@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where ...@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn) import Data.List (sort, concat, null, union, (++), tails, sortOn)
import Data.Set (size) import Data.Set (size)
import Data.Map (Map, elems, fromList, unionWith) import Data.Map (Map, elems, fromList, unionWith, keys)
import Data.String (String) import Data.String (String)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -150,8 +150,13 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc' ...@@ -150,8 +150,13 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getPeriodIds :: Phylo -> [(Date,Date)] getPeriodIds :: Phylo -> [(Date,Date)]
getPeriodIds phylo = sortOn fst getPeriodIds phylo = sortOn fst
$ map (\prd -> prd ^. phylo_periodId) $ keys
$ phylo ^. phylo_periods $ phylo ^. phylo_periods
getConfig :: Phylo -> Config getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
\ No newline at end of file
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment