Merge branch 'pipeline'

a630946f · Alexandre Delanoë · 1ddff49f · 05848890 · a630946f · a630946f
Commit a630946f authored Jun 09, 2018 by Alexandre Delanoë
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 8 deletions

package.yaml package.yaml +1 -0

Metrics.hs src/Gargantext/Text/Metrics.hs +14 -8

stack.yaml stack.yaml +2 -0

No files found.
--- a/package.yaml
+++ b/package.yaml
@@ -68,6 +68,7 @@ library:
  - hlcm
  - ini
  - jose-jwt
+  - kmeans-vector
  - lens
  - logging-effect
  - matrix

--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
@@ -24,15 +24,17 @@ module Gargantext.Text.Metrics
 import Data.Text (Text, pack)
 import Data.Map (Map)
 import qualified Data.List as L
 import qualified Data.Map  as M
 import qualified Data.Set  as S
 import qualified Data.Text as T
+import qualified Data.Vector as V
+import qualified Data.Vector.Unboxed as VU
 import Data.Tuple.Extra (both)
 --import GHC.Real (Ratio)
 --import qualified Data.Text.Metrics as DTM
 import Data.Array.Accelerate (toList)
+import Math.KMeans (kmeans, euclidSq, elements)
 import Gargantext.Prelude
@@ -61,17 +63,21 @@ import GHC.Real (round)
 type ListSize  = Int
 type BinSize = Double
+-- Map list creation
+-- Kmean split into 2 main clusters with Inclusion/Exclusion (relevance score)
+-- Sample the main cluster ordered by specificity/genericity in s parts
+-- each parts is then ordered by Inclusion/Exclusion
+-- take n scored terms in each parts where n * s = l
 takeSome :: Ord t => ListSize -> BinSize -> [Scored t] -> [Scored t]
 takeSome l s scores = L.take l
                    $ takeSample n m
-                    $ takeKmeans l'
+                    $ splitKmeans 2 scores
-                    $ L.reverse $ L.sortOn _scored_incExc scores
  where
-    -- TODO : KMEAN split into 2 main clusters 
+    -- (TODO: benchmark with accelerate-example kmeans version)
-    -- (advice: use accelerate-example kmeans version 
+    splitKmeans x xs = elements
-    --  and maybe benchmark it to be sure)
+                     $ V.head
-    takeKmeans = L.take
+                     $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
-    l' = 4000
+                              euclidSq x xs
    n = round ((fromIntegral l)/s)
    m = round $ (fromIntegral $ length scores) / (s)
    takeSample n m xs = L.concat $ map (L.take n)

--- a/stack.yaml
+++ b/stack.yaml
@@ -23,6 +23,8 @@ extra-deps:
 - fullstop-0.1.4
 - haskell-src-exts-1.18.2
 - http-types-0.12.1
+- kmeans-vector-0.3.2
+- probable-0.1.3
 - protolude-0.2
 - servant-0.13
 - servant-auth-0.3.0.1