Commit 4ee73701 authored by qlobbe's avatar qlobbe Committed by Alexandre Delanoë

new strategy for choosing temporal matching candidates

parent 2120f449
......@@ -278,7 +278,7 @@ main = do
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus mapList config
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
......
......@@ -37,7 +37,6 @@ import Data.Text (Text, pack)
import Data.Vector (Vector)
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude
......@@ -362,8 +361,8 @@ data Document = Document
-- | The Foundations of a Phylo created from a given TermList
data PhyloFoundations = PhyloFoundations
{ _foundations_roots :: !(Vector Ngrams)
, _foundations_mapList :: TermList
{ _foundations_roots :: (Vector Ngrams)
, _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups
} deriving (Generic, Show, Eq)
instance ToSchema PhyloFoundations where
......
......@@ -86,8 +86,8 @@ phylo2dot2json phylo = do
flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
flowPhyloAPI config cId = do
(mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId
phyloWithCliques <- pure $ toPhyloWithoutLink corpus mapList config
(_, corpus) <- corpusIdtoDocuments (timeUnit config) cId
phyloWithCliques <- pure $ toPhyloWithoutLink corpus config
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
......
......@@ -19,8 +19,8 @@ import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort)
import Data.Map (Map)
import Data.Vector (Vector)
import Data.Text (Text, toLower)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport
......@@ -31,6 +31,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching)
import Gargantext.Prelude
import qualified Data.Vector as Vector
import qualified Data.Set as Set
import qualified Data.Map as Map
---------------------------------
-- | STEP 5 | -- Export the phylo
......@@ -62,6 +63,7 @@ flatPhylo = case (getSeaElevation emptyPhylo) of
emptyPhylo' :: Phylo
emptyPhylo' = scanSimilarity 1
$ joinRootsToGroups
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
---------------------------------------------
......@@ -83,7 +85,7 @@ docsByPeriods = groupDocsByPeriod date periods docs
emptyPhylo :: Phylo
emptyPhylo = initPhylo docs mapList config
emptyPhylo = initPhylo docs config
phyloCooc :: Map Date Cooc
......@@ -120,7 +122,11 @@ docs = map (\(d,t)
foundations :: PhyloFoundations
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
foundations = PhyloFoundations roots Map.empty
roots :: Vector Ngrams
roots = Vector.fromList $ map toLower actants
--------------------------------------------
......@@ -128,8 +134,8 @@ foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
--------------------------------------------
mapList :: TermList
mapList = map (\a -> ([toLower a],[])) actants
-- mapList :: TermList
-- mapList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams]
......
......@@ -25,7 +25,6 @@ import Prelude (floor)
import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
......@@ -203,13 +202,28 @@ indexDates' m = map (\docs ->
in (f,l)) m
-- create a map of roots and group ids
joinRootsToGroups :: Phylo -> Phylo
joinRootsToGroups phylo = set (phylo_foundations . foundations_rootsInGroups) rootsMap phylo
where
--------------------------------------
rootsMap :: Map Int [PhyloGroupId]
rootsMap = fromListWith (++)
$ concat -- flatten
$ map (\g ->
map (\n -> (n,[getGroupId g])) $ _phylo_groupNgrams g)
$ getGroupsFromScale 1 phylo
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> scanSimilarity 1
toPhyloWithoutLink :: [Document] -> PhyloConfig -> Phylo
toPhyloWithoutLink docs conf = case (getSeaElevation phyloBase) of
Constante _ _ -> joinRootsToGroups
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> joinRootsToGroups
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
......@@ -221,7 +235,7 @@ toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
--------------------------------------
phyloBase :: Phylo
phyloBase = initPhylo docs lst conf
phyloBase = initPhylo docs conf
--------------------------------------
---------------------------
......@@ -409,9 +423,10 @@ initPhyloScales lvlMax pId =
-- Init the basic elements of a Phylo
--
initPhylo :: [Document] -> TermList -> PhyloConfig -> Phylo
initPhylo docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf =
let roots = Vector.fromList $ nub $ concat $ map text docs
foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
......
......@@ -316,6 +316,9 @@ ngramsToCooc ngrams coocs =
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupScale), g ^. phylo_groupIndex)
getGroupNgrams :: PhyloGroup -> [Int]
getGroupNgrams g = g ^. phylo_groupNgrams
idToPrd :: PhyloGroupId -> Period
idToPrd id = (fst . fst) id
......@@ -427,6 +430,9 @@ setConfig config phylo = phylo
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment