Commit 09cf2917 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Pipeline] clustering with C++ Louvain bindings, ok.

parent 00344aaf
...@@ -22,17 +22,16 @@ import Gargantext.Core (Lang(FR)) ...@@ -22,17 +22,16 @@ import Gargantext.Core (Lang(FR))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex) import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
import Gargantext.Viz.Graph.Distances.Matrice (distributional) import Gargantext.Viz.Graph.Distances.Matrice (conditional)
import Gargantext.Text.Metrics.Occurrences (cooc, removeApax) import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms) import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain (bestpartition) import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Graph.Clustering.Louvain.Utils (map2graph)
pipeline path = do pipeline path = do
-- Text <- IO Text <- FilePath -- Text <- IO Text <- FilePath
text <- readFile path text <- readFile path
let contexts = splitBy (Sentences 3) text let contexts = splitBy (Sentences 3) text
myterms <- extractTerms Multi FR contexts myterms <- extractTerms Multi FR contexts
...@@ -40,11 +39,12 @@ pipeline path = do ...@@ -40,11 +39,12 @@ pipeline path = do
-- TODO groupBy (Stem | GroupList) -- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms let myCooc = removeApax $ cooc myterms
-- Cooc -> Matrix -- Cooc -> Matrix
let theScores = M.filter (/=0) $ score distributional myCooc let theScores = M.take 350 $ M.filter (>0) $ score conditional myCooc
let (ti, _) = createIndices theScores let (ti, _) = createIndices theScores
--
-- Matrix -> Clustering -> Graph -> JSON ---- -- Matrix -> Clustering -> Graph -> JSON
pure $ bestpartition False $ map2graph $ toIndex ti theScores ---- pure $ bestpartition False $ map2graph $ toIndex ti theScores
partitions <- cLouvain $ toIndex ti theScores
pure partitions
...@@ -60,8 +60,3 @@ $(deriveJSON (unPrefix "g_") ''Graph) ...@@ -60,8 +60,3 @@ $(deriveJSON (unPrefix "g_") ''Graph)
...@@ -89,8 +89,21 @@ type Matrix' a = Acc (Matrix a) ...@@ -89,8 +89,21 @@ type Matrix' a = Acc (Matrix a)
type InclusionExclusion = Double type InclusionExclusion = Double
type SpecificityGenericity = Double type SpecificityGenericity = Double
conditional :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional m = (run $ ie (use m), run $ sg (use m)) miniMax :: Matrix' Double -> Matrix' Double
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where
miniMax' = (the $ minimum $ maximum m)
conditional :: Matrix Int -> Matrix Double
conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
where
r :: Rank
r = rank' m
conditional' :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional' m = (run $ ie (use m), run $ sg (use m))
where where
ie :: Matrix' Double -> Matrix' Double ie :: Matrix' Double -> Matrix' Double
...@@ -115,14 +128,10 @@ conditional m = (run $ ie (use m), run $ sg (use m)) ...@@ -115,14 +128,10 @@ conditional m = (run $ ie (use m), run $ sg (use m))
-- | Distributional Distance -- | Distributional Distance
distributional :: Matrix Int -> Matrix Double distributional :: Matrix Int -> Matrix Double
distributional m = run $ filter $ ri (map fromIntegral $ use m) distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
where where
n = rank' m n = rank' m
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where
miniMax' = (the $ minimum $ maximum m)
filter m = zipWith (\a b -> max a b) m (transpose m) filter m = zipWith (\a b -> max a b) m (transpose m)
ri mat = zipWith (/) mat1 mat2 ri mat = zipWith (/) mat1 mat2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment