Commit 5508d716 authored by Quentin Lobbé's avatar Quentin Lobbé

Refactor level -1 and 0 and start linking -1 to 0

parent e134e1b8
...@@ -116,7 +116,7 @@ type PhyloLevelId = (PhyloPeriodId, Int) ...@@ -116,7 +116,7 @@ type PhyloLevelId = (PhyloPeriodId, Int)
data PhyloGroup = data PhyloGroup =
PhyloGroup { _phylo_groupId :: PhyloGroupId PhyloGroup { _phylo_groupId :: PhyloGroupId
, _phylo_groupLabel :: Text , _phylo_groupLabel :: Text
, _phylo_groupNgrams :: [NgramsId] , _phylo_groupNgrams :: [Int]
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
......
...@@ -67,18 +67,17 @@ type PeriodeSize = Int ...@@ -67,18 +67,17 @@ type PeriodeSize = Int
-- data Periodes b a = Map (b,b) a -- data Periodes b a = Map (b,b) a
type Occurrences = Int type Occurrences = Int
-------------------------------------------------------------------- data Levels = Level_m1 | Level_0 | Level_1 | Level_2 | Level_N
deriving (Show, Eq, Enum, Bounded)
data LinkLevels = Link_m1To0 | Link_0To1 | Link_mxTox
deriving (Show, Eq, Enum, Bounded)
data PhyloError = LevelDoesNotExist data PhyloError = LevelDoesNotExist
| LevelUnassigned | LevelUnassigned
deriving (Show) deriving (Show)
data PhyloField = PhyloField {
phyloField_id :: Int
}
-------------------------------------------------------------------- --------------------------------------------------------------------
phyloExampleFinal :: Phylo phyloExampleFinal :: Phylo
phyloExampleFinal = undefined phyloExampleFinal = undefined
...@@ -87,29 +86,20 @@ phyloExampleFinal = undefined ...@@ -87,29 +86,20 @@ phyloExampleFinal = undefined
appariement :: Map (Date, Date) (Map (Set Ngrams) Int) appariement :: Map (Date, Date) (Map (Set Ngrams) Int)
appariement = undefined appariement = undefined
--------------------------------------------------------------------
fisToFields :: Fis
-> [PhyloField]
fisToFields = undefined
phyloClusters :: Map (Date,Date) [PhyloField]
phyloClusters = undefined
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 8 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo -- | STEP 10 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 7 | -- Link the PhyloGroups of level 1 through the Periods -- | STEP 9 | -- Link the PhyloGroups of level 1 through the Periods
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 6 | -- Cluster the Fis and buil level 1 of the Phylo -- | STEP 8 | -- Cluster the Fis and buil level 1 of the Phylo
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 5 | -- Find the Fis out of Documents and Ngrams and build level 1 of the Phylo -- | STEP 7 | -- Find the Fis out of Documents and Ngrams and build level 1 of the Phylo
phyloFis :: Map (Date, Date) Fis phyloFis :: Map (Date, Date) Fis
...@@ -128,52 +118,91 @@ corpusToFis f = Map.map (\d -> fisWithSizePolyMap (Segment 1 20) 1 (map f d)) ...@@ -128,52 +118,91 @@ corpusToFis f = Map.map (\d -> fisWithSizePolyMap (Segment 1 20) 1 (map f d))
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | STEP 4 | -- Build level -1 and 0 of the Phylo -- | STEP 6 | -- Link level 0 to level -1
addPointer :: Semigroup field addPointer :: Semigroup field
=> ASetter source target identity (field -> field) => ASetter current target identity (field -> field)
-> field -> source -> target -> field -> current -> target
addPointer field targetPointer source = addPointer field targetPointer current =
set field (<> targetPointer) source set field (<> targetPointer) current
containsIdx :: [Int] -> [Int] -> Bool
containsIdx l l'
| List.null l' = False
| List.last l < List.head l' = False
| List.head l' `List.elem` l = True
| otherwise = containsIdx l (List.tail l')
shouldLink :: LinkLevels -> PhyloGroup -> PhyloGroup -> Bool
shouldLink lvl current target = case lvl of
Link_m1To0 -> containsIdx (_phylo_groupNgrams target) (_phylo_groupNgrams current)
Link_0To1 -> containsIdx (_phylo_groupNgrams target) (_phylo_groupNgrams current)
Link_mxTox -> undefined
_ -> panic ("error link level to be defined")
------------------------------------------------------------------------
-- | STEP 5 | -- Build level 0 (for the moment it's just a durty copy of level -1)
setGroupIdLvl :: Int -> PhyloGroup -> PhyloGroup
setGroupIdLvl lvl (PhyloGroup ((period, lvl'), idx) gLabel gNgrams gPP gPC gLP gLC)
= PhyloGroup ((period, lvl), idx) gLabel gNgrams gPP gPC gLP gLC
setPhyloLevel :: Int -> PhyloLevel -> PhyloLevel
setPhyloLevel lvl (PhyloLevel (periodId, lvl') lvlGroups)
= PhyloLevel (periodId, lvl) lvlGroups'
where
lvlGroups' = map (\g -> setGroupIdLvl lvl g) lvlGroups
copyPhyloLevel :: Int -> [PhyloLevel] -> [PhyloLevel]
copyPhyloLevel lvl l = (setPhyloLevel lvl (List.head l)) : l
alterLvl :: Int -> [PhyloPeriod] -> [PhyloPeriod]
alterLvl lvl l = map (\p -> PhyloPeriod (_phylo_periodId p) (copyPhyloLevel lvl $ _phylo_periodLevels p)) l
phyloWithGroups0 :: Phylo
phyloWithGroups0 = updatePhyloByLevel Level_0 phyloWithGroupsm1
alterLvl :: PhyloGroup -> PhyloGroup
alterLvl g = g {_phylo_groupId = ((Tuple.fst $ Tuple.fst $ _phylo_groupId g, 0), Tuple.snd $ _phylo_groupId g)}
alterLvl' :: PhyloGroup -> PhyloGroup ------------------------------------------------------------------------
alterLvl' (PhyloGroup ((dates, _lvl), ix) gLabel gNgrams gPeriodParents gPeriodChilds gLevelParent gLevelChilds) -- | STEP 4 | -- Build level -1
= PhyloGroup gId' gLabel gNgrams' gPeriodParents gPeriodChilds gLevelParent gLevelChilds
where
gId' = ((dates, 0), ix)
gNgrams' = gNgrams
-- | for the moment level 0 is just a copy of level -1
--level0PhyloGroups :: [PhyloGroup]
--level0PhyloGroups = map alterLvl initPhyloGroups
findIdx :: Ngrams -> Int findIdx :: Ngrams -> Int
findIdx n = case (Vector.elemIndex n phyloNgrams) of findIdx n = case (Vector.elemIndex n (_phylo_ngrams phylo)) of
Nothing -> panic "PhyloError" Nothing -> panic "PhyloError"
Just i -> i Just i -> i
ngramsToGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> PhyloGroup ngramsToGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> PhyloGroup
ngramsToGroup terms label idx lvl from to = PhyloGroup (((from, to), lvl), idx) label (map (\x -> findIdx x) terms) [] [] [] [] ngramsToGroup terms label idx lvl from to = PhyloGroup (((from, to), lvl), idx) label (List.sort (map (\x -> findIdx x) terms)) [] [] [] []
docsToGroups :: (Date, Date) -> Corpus -> [PhyloGroup] docsToLevel :: (Date, Date) -> Corpus -> PhyloLevel
docsToGroups k v = map (\x -> docsToLevel k v = PhyloLevel (k,(-1)) (map (\x ->
ngramsToGroup [Tuple.snd x] (Tuple.snd x) (Tuple.fst x) (-1) (Tuple.fst k) (Tuple.snd k) ngramsToGroup [Tuple.snd x] (Tuple.snd x) (Tuple.fst x) (-1) (Tuple.fst k) (Tuple.snd k)
) $ zip [1..] $ (List.nub . List.concat) $ map (words . text) v ) $ zip [1..] $ (List.nub . List.concat) $ map (words . text) v)
data Levels = Level_m1 | Level_0 | Level_1 | Level_2 | Level_N corpusToPhyloPeriod :: Map (Date,Date) Corpus -> [PhyloPeriod]
deriving (Show, Eq, Enum, Bounded) corpusToPhyloPeriod corpus = map (\x -> PhyloPeriod (Tuple.fst x) [(Tuple.snd x)]) $ zip (Map.keys mapLvl) (Map.elems mapLvl)
where
toPhyloGroups :: Levels -> Map (Date,Date) Corpus -> [PhyloGroup] mapLvl :: Map (Date,Date) PhyloLevel
toPhyloGroups lvl corpus = case lvl of mapLvl = Map.mapWithKey docsToLevel corpus
Level_m1 -> List.concat $ Map.elems $ Map.mapWithKey docsToGroups corpus
_ -> panic ("error phylo to be defined") updatePhyloByLevel :: Levels -> Phylo -> Phylo
updatePhyloByLevel lvl (Phylo pDuration pNgrams pPeriods)
= case lvl of
-- | aka: level -1
initPhyloGroups :: [PhyloGroup] Level_m1 -> Phylo pDuration pNgrams pPeriods'
initPhyloGroups = toPhyloGroups Level_m1 phyloTerms where pPeriods' = (corpusToPhyloPeriod phyloTerms) List.++ pPeriods
Level_0 -> Phylo pDuration pNgrams pPeriods'
where pPeriods' = alterLvl 0 pPeriods
_ -> panic ("error level to be defined")
phyloWithGroupsm1 :: Phylo
phyloWithGroupsm1 = updatePhyloByLevel Level_m1 phylo
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -208,7 +237,7 @@ cleanCorpus ml = map (\(Document d t) -> Document d (unwords $ filter (\x -> ele ...@@ -208,7 +237,7 @@ cleanCorpus ml = map (\(Document d t) -> Document d (unwords $ filter (\x -> ele
-- | STEP 2 | -- Find some Ngrams (ie: phyloGroup of level -1) out of the Corpus & init the phylo -- | STEP 2 | -- Find some Ngrams (ie: phyloGroup of level -1) out of the Corpus & init the phylo
-- phylo = Phylo (both date $ (List.last &&& List.head) phyloCorpus) (initPhyloNgrams cleanedActants) undefined phylo = Phylo (both date $ (List.last &&& List.head) phyloCorpus) phyloNgrams []
phyloNgrams :: PhyloNgrams phyloNgrams :: PhyloNgrams
phyloNgrams = Vector.fromList cleanedActants phyloNgrams = Vector.fromList cleanedActants
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment