Commit 47ed713f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PIPELINE] adding clustering louvain.

parent b76fc489
...@@ -16,26 +16,35 @@ module Gargantext.Pipeline ...@@ -16,26 +16,35 @@ module Gargantext.Pipeline
where where
import Data.Text.IO (readFile) import Data.Text.IO (readFile)
import qualified Data.Map.Strict as M
---------------------------------------------- ----------------------------------------------
import Gargantext.Core (Lang(FR)) import Gargantext.Core (Lang(FR))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score) import Gargantext.Viz.Graph.Index (score, createIndexes, toIndex)
import Gargantext.Viz.Graph.Distances.Matrice (distributional) import Gargantext.Viz.Graph.Distances.Matrice (distributional)
import Gargantext.Text.Metrics.Occurrences (cooc, removeApax) import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
import Gargantext.Text.Terms (TermType(Multi), extractTerms) import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain (bestpartition)
import Data.Graph.Clustering.Louvain.Utils (map2graph)
pipeline path = do pipeline path = do
-- Text <- IO Text <- FilePath -- Text <- IO Text <- FilePath
text <- readFile path text <- readFile path
let contexts = splitBy (Sentences 3) text let contexts = splitBy (Sentences 3) text
myterms <- extractTerms Multi FR contexts myterms <- extractTerms Multi FR contexts
-- TODO filter (\t -> not . elem t stopList) myterms -- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList) -- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms let myCooc = removeApax $ cooc myterms
-- Cooc -> Matrix -- Cooc -> Matrix
pure $ score distributional myCooc let theScores = M.filter (/=0) $ score distributional myCooc
let (ti, _) = createIndexes theScores
-- Matrix -> Clustering -> Graph -> JSON -- Matrix -> Clustering -> Graph -> JSON
pure $ bestpartition False $ map2graph $ toIndex ti theScores
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment