Commit 47ed713f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PIPELINE] adding clustering louvain.

parent b76fc489
......@@ -16,26 +16,35 @@ module Gargantext.Pipeline
where
import Data.Text.IO (readFile)
import qualified Data.Map.Strict as M
----------------------------------------------
import Gargantext.Core (Lang(FR))
import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score)
import Gargantext.Viz.Graph.Index (score, createIndexes, toIndex)
import Gargantext.Viz.Graph.Distances.Matrice (distributional)
import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
import Gargantext.Text.Terms (TermType(Multi), extractTerms)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain (bestpartition)
import Data.Graph.Clustering.Louvain.Utils (map2graph)
pipeline path = do
-- Text <- IO Text <- FilePath
text <- readFile path
let contexts = splitBy (Sentences 3) text
myterms <- extractTerms Multi FR contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms
-- Cooc -> Matrix
pure $ score distributional myCooc
let theScores = M.filter (/=0) $ score distributional myCooc
let (ti, _) = createIndexes theScores
-- Matrix -> Clustering -> Graph -> JSON
pure $ bestpartition False $ map2graph $ toIndex ti theScores
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment