Commit a630946f authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'pipeline'

parents 1ddff49f 05848890
......@@ -68,6 +68,7 @@ library:
- hlcm
- ini
- jose-jwt
- kmeans-vector
- lens
- logging-effect
- matrix
......
......@@ -24,15 +24,17 @@ module Gargantext.Text.Metrics
import Data.Text (Text, pack)
import Data.Map (Map)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList)
import Math.KMeans (kmeans, euclidSq, elements)
import Gargantext.Prelude
......@@ -61,17 +63,21 @@ import GHC.Real (round)
type ListSize = Int
type BinSize = Double
-- Map list creation
-- Kmean split into 2 main clusters with Inclusion/Exclusion (relevance score)
-- Sample the main cluster ordered by specificity/genericity in s parts
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * s = l
takeSome :: Ord t => ListSize -> BinSize -> [Scored t] -> [Scored t]
takeSome l s scores = L.take l
$ takeSample n m
$ takeKmeans l'
$ L.reverse $ L.sortOn _scored_incExc scores
$ splitKmeans 2 scores
where
-- TODO : KMEAN split into 2 main clusters
-- (advice: use accelerate-example kmeans version
-- and maybe benchmark it to be sure)
takeKmeans = L.take
l' = 4000
-- (TODO: benchmark with accelerate-example kmeans version)
splitKmeans x xs = elements
$ V.head
$ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
euclidSq x xs
n = round ((fromIntegral l)/s)
m = round $ (fromIntegral $ length scores) / (s)
takeSample n m xs = L.concat $ map (L.take n)
......
......@@ -23,6 +23,8 @@ extra-deps:
- fullstop-0.1.4
- haskell-src-exts-1.18.2
- http-types-0.12.1
- kmeans-vector-0.3.2
- probable-0.1.3
- protolude-0.2
- servant-0.13
- servant-auth-0.3.0.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment