Commit 4ee73701 authored by qlobbe's avatar qlobbe Committed by Alexandre Delanoë

new strategy for choosing temporal matching candidates

parent 2120f449
...@@ -278,7 +278,7 @@ main = do ...@@ -278,7 +278,7 @@ main = do
pure $ toPhylo (setConfig config phyloWithoutLink) pure $ toPhylo (setConfig config phyloWithoutLink)
else do else do
printIOMsg "Reconstruct the phylo from scratch" printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus mapList config phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink) pure $ toPhylo (setConfig config phyloWithoutLink)
......
...@@ -37,7 +37,6 @@ import Data.Text (Text, pack) ...@@ -37,7 +37,6 @@ import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Utils.Prefix (unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -362,8 +361,8 @@ data Document = Document ...@@ -362,8 +361,8 @@ data Document = Document
-- | The Foundations of a Phylo created from a given TermList -- | The Foundations of a Phylo created from a given TermList
data PhyloFoundations = PhyloFoundations data PhyloFoundations = PhyloFoundations
{ _foundations_roots :: !(Vector Ngrams) { _foundations_roots :: (Vector Ngrams)
, _foundations_mapList :: TermList , _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloFoundations where instance ToSchema PhyloFoundations where
......
...@@ -86,8 +86,8 @@ phylo2dot2json phylo = do ...@@ -86,8 +86,8 @@ phylo2dot2json phylo = do
flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
flowPhyloAPI config cId = do flowPhyloAPI config cId = do
(mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId (_, corpus) <- corpusIdtoDocuments (timeUnit config) cId
phyloWithCliques <- pure $ toPhyloWithoutLink corpus mapList config phyloWithCliques <- pure $ toPhyloWithoutLink corpus config
-- writePhylo phyloWithCliquesFile phyloWithCliques -- writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques) pure $ toPhylo (setConfig config phyloWithCliques)
......
...@@ -19,8 +19,8 @@ import Control.Lens ...@@ -19,8 +19,8 @@ import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort) import Data.List (sortOn, nub, sort)
import Data.Map (Map) import Data.Map (Map)
import Data.Vector (Vector)
import Data.Text (Text, toLower) import Data.Text (Text, toLower)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts) import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport import Gargantext.Core.Viz.Phylo.PhyloExport
...@@ -31,6 +31,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching) ...@@ -31,6 +31,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map
--------------------------------- ---------------------------------
-- | STEP 5 | -- Export the phylo -- | STEP 5 | -- Export the phylo
...@@ -62,6 +63,7 @@ flatPhylo = case (getSeaElevation emptyPhylo) of ...@@ -62,6 +63,7 @@ flatPhylo = case (getSeaElevation emptyPhylo) of
emptyPhylo' :: Phylo emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1 emptyPhylo' = scanSimilarity 1
$ joinRootsToGroups
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo $ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
--------------------------------------------- ---------------------------------------------
...@@ -83,7 +85,7 @@ docsByPeriods = groupDocsByPeriod date periods docs ...@@ -83,7 +85,7 @@ docsByPeriods = groupDocsByPeriod date periods docs
emptyPhylo :: Phylo emptyPhylo :: Phylo
emptyPhylo = initPhylo docs mapList config emptyPhylo = initPhylo docs config
phyloCooc :: Map Date Cooc phyloCooc :: Map Date Cooc
...@@ -120,7 +122,11 @@ docs = map (\(d,t) ...@@ -120,7 +122,11 @@ docs = map (\(d,t)
foundations :: PhyloFoundations foundations :: PhyloFoundations
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList foundations = PhyloFoundations roots Map.empty
roots :: Vector Ngrams
roots = Vector.fromList $ map toLower actants
-------------------------------------------- --------------------------------------------
...@@ -128,8 +134,8 @@ foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList ...@@ -128,8 +134,8 @@ foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
-------------------------------------------- --------------------------------------------
mapList :: TermList -- mapList :: TermList
mapList = map (\a -> ([toLower a],[])) actants -- mapList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams] actants :: [Ngrams]
......
...@@ -25,7 +25,6 @@ import Prelude (floor) ...@@ -25,7 +25,6 @@ import Prelude (floor)
import Gargantext.Core.Methods.Similarities (Similarity(Conditional)) import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..)) import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
...@@ -203,13 +202,28 @@ indexDates' m = map (\docs -> ...@@ -203,13 +202,28 @@ indexDates' m = map (\docs ->
in (f,l)) m in (f,l)) m
-- create a map of roots and group ids
joinRootsToGroups :: Phylo -> Phylo
joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo
where
--------------------------------------
rootsMap :: Map Int [PhyloGroupId]
rootsMap = fromListWith (++)
$ concat -- flatten
$ map (\g ->
map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
$ getGroupsFromScale 1 phylo
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering -- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink -- tophylowithoutLink
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of toPhyloWithoutLink docs conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) Constante _ _ -> joinRootsToGroups
Adaptative _ -> scanSimilarity 1 $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> joinRootsToGroups
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase) $ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where where
-------------------------------------- --------------------------------------
...@@ -221,7 +235,7 @@ toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of ...@@ -221,7 +235,7 @@ toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = initPhylo docs lst conf phyloBase = initPhylo docs conf
-------------------------------------- --------------------------------------
--------------------------- ---------------------------
...@@ -409,9 +423,10 @@ initPhyloScales lvlMax pId = ...@@ -409,9 +423,10 @@ initPhyloScales lvlMax pId =
-- Init the basic elements of a Phylo -- Init the basic elements of a Phylo
-- --
initPhylo :: [Document] -> TermList -> PhyloConfig -> Phylo initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs lst conf = initPhylo docs conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let roots = Vector.fromList $ nub $ concat $ map text docs
foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf } params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
......
...@@ -316,6 +316,9 @@ ngramsToCooc ngrams coocs = ...@@ -316,6 +316,9 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex) getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
getGroupNgrams :: PhyloGroup -> [Int]
getGroupNgrams g = g ^. phylo_groupNgrams
idToPrd :: PhyloGroupId -> Period idToPrd :: PhyloGroupId -> Period
idToPrd id = (fst . fst) id idToPrd id = (fst . fst) id
...@@ -427,6 +430,9 @@ setConfig config phylo = phylo ...@@ -427,6 +430,9 @@ setConfig config phylo = phylo
getRoots :: Phylo -> Vector Ngrams getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
getSources :: Phylo -> Vector Text getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources) getSources phylo = _sources (phylo ^. phylo_sources)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment