Commit 91e81646 authored by qlobbe's avatar qlobbe

to the level 1

parent 3f949532
Pipeline #543 failed with stage
......@@ -172,7 +172,7 @@ data Phylo =
, _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double)
, _phylo_param :: PhyloParam
, _phylo_periods :: [PhyloPeriod]
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod
}
deriving (Generic, Show, Eq)
......@@ -184,10 +184,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodId :: PhyloPeriodId
, _phylo_periodLevels :: [PhyloLevel]
}
deriving (Generic, Show, Eq)
PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq)
-- | Level : a level of clustering
......@@ -202,26 +201,41 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel =
PhyloLevel { _phylo_levelId :: PhyloLevelId
, _phylo_levelGroups :: [PhyloGroup]
}
PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
}
deriving (Generic, Show, Eq)
--------------------
-- | PhyloGroup | --
--------------------
type PhyloGroupId = (PhyloLevelId, Int)
type Index = Int
type PhyloGroupId = (PhyloLevelId, Index)
-- | BranchId : (a level, a sequence of branch index)
-- the sequence is a path of heritage from the most to the less specific branch
type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup =
PhyloGroup { _phylo_groupId :: PhyloGroupId
PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int
, _phylo_groupSupport :: Support
, _phylo_groupNgrams :: [Int]
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupBreakPointer :: Maybe Pointer
}
deriving (Generic, Show, Eq)
-- | Weight : A generic mesure that can be associated with an Id
type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
---------------------------
-- | Frequent Item Set | --
......
......@@ -35,6 +35,14 @@ import Control.Lens
import qualified Data.Vector as Vector
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1 :: Phylo
phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
---------------------------------------------
-- | STEP 2 | -- Build the frequent items set
---------------------------------------------
......
......@@ -16,7 +16,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, mapWithKey, toList, elems)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!))
import Data.Set (size)
import Data.Vector (Vector)
......@@ -29,7 +29,7 @@ import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Control.Lens
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.Set as Set
......@@ -41,11 +41,11 @@ import qualified Data.Set as Set
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = phyloBase
toPhylo docs lst conf = phylo1
where
--------------------------------------
_phylo1 :: Phylo
_phylo1 = toPhylo1 docs phyloBase
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
......@@ -58,15 +58,43 @@ toPhylo docs lst conf = phyloBase
--------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then
let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [(((pId,lvl),length groups),f obj pId lvl (length groups) (getRoots phylo))] ) [] phyloFis)
else
phyloLvl )
phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> PhyloGroup
fisToGroup fis pId lvl idx fdt =
PhyloGroup pId lvl idx
(fis ^. phyloFis_support)
(ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt)
(1,[])
[] [] [] []
Nothing
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = undefined
toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where
--------------------------------------
_mFis :: Map (Date,Date) [PhyloFis]
_mFis = toPhyloFis _docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase)
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase)
--------------------------------------
_docs' :: Map (Date,Date) [Document]
_docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
--------------------------------------
......@@ -108,19 +136,22 @@ filterFisByNested m =
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis]
toPhyloFis mDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested
$ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique)
$ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport)
$ traceFis "Unfiltered Fis" mFis
toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested
$ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique)
$ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport)
$ traceFis "Unfiltered Fis" phyloFis
where
--------------------------------------
-- | create the fis from the docs for each period
mFis :: Map (Date,Date) [PhyloFis]
mFis = mapWithKey (\prd docs -> let fis = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) prd) fis ) mDocs
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
--------------------------------------
......@@ -173,6 +204,11 @@ nbDocsByTime docs step =
$ unionWith (+) time docs'
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
-- | To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf =
......@@ -184,4 +220,4 @@ toPhyloBase docs lst conf =
(docsToCoocByYear docs (foundations ^. foundations_roots) conf)
(nbDocsByTime docs $ timeUnit conf)
params
(map (\prd -> PhyloPeriod prd []) periods)
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
......@@ -19,7 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn)
import Data.Set (size)
import Data.Map (Map, elems, fromList, unionWith)
import Data.Map (Map, elems, fromList, unionWith, keys)
import Data.String (String)
import Gargantext.Prelude
......@@ -150,8 +150,13 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getPeriodIds :: Phylo -> [(Date,Date)]
getPeriodIds phylo = sortOn fst
$ map (\prd -> prd ^. phylo_periodId)
$ keys
$ phylo ^. phylo_periods
getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
\ No newline at end of file
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment