Commit 09cf2917 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Pipeline] clustering with C++ Louvain bindings, ok.

parent 00344aaf
......@@ -22,17 +22,16 @@ import Gargantext.Core (Lang(FR))
import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
import Gargantext.Viz.Graph.Distances.Matrice (distributional)
import Gargantext.Viz.Graph.Distances.Matrice (conditional)
import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain (bestpartition)
import Data.Graph.Clustering.Louvain.Utils (map2graph)
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
pipeline path = do
-- Text <- IO Text <- FilePath
text <- readFile path
text <- readFile path
let contexts = splitBy (Sentences 3) text
myterms <- extractTerms Multi FR contexts
......@@ -40,11 +39,12 @@ pipeline path = do
-- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms
-- Cooc -> Matrix
let theScores = M.filter (/=0) $ score distributional myCooc
let theScores = M.take 350 $ M.filter (>0) $ score conditional myCooc
let (ti, _) = createIndices theScores
-- Matrix -> Clustering -> Graph -> JSON
pure $ bestpartition False $ map2graph $ toIndex ti theScores
---- -- Matrix -> Clustering -> Graph -> JSON
---- pure $ bestpartition False $ map2graph $ toIndex ti theScores
partitions <- cLouvain $ toIndex ti theScores
pure partitions
......@@ -60,8 +60,3 @@ $(deriveJSON (unPrefix "g_") ''Graph)
......@@ -89,8 +89,21 @@ type Matrix' a = Acc (Matrix a)
type InclusionExclusion = Double
type SpecificityGenericity = Double
conditional :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional m = (run $ ie (use m), run $ sg (use m))
miniMax :: Matrix' Double -> Matrix' Double
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
miniMax' = (the $ minimum $ maximum m)
conditional :: Matrix Int -> Matrix Double
conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
r :: Rank
r = rank' m
conditional' :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional' m = (run $ ie (use m), run $ sg (use m))
ie :: Matrix' Double -> Matrix' Double
......@@ -115,14 +128,10 @@ conditional m = (run $ ie (use m), run $ sg (use m))
-- | Distributional Distance
distributional :: Matrix Int -> Matrix Double
distributional m = run $ filter $ ri (map fromIntegral $ use m)
distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
n = rank' m
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
miniMax' = (the $ minimum $ maximum m)
filter m = zipWith (\a b -> max a b) m (transpose m)
ri mat = zipWith (/) mat1 mat2
