Commit 8255d8e6 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[STAT] map list creation adding kmeans to splitBy inclusion score.

parent bfe7d274
...@@ -68,6 +68,7 @@ library: ...@@ -68,6 +68,7 @@ library:
- hlcm - hlcm
- ini - ini
- jose-jwt - jose-jwt
- kmeans-vector
- lens - lens
- logging-effect - logging-effect
- matrix - matrix
......
...@@ -24,15 +24,17 @@ module Gargantext.Text.Metrics ...@@ -24,15 +24,17 @@ module Gargantext.Text.Metrics
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.List as L import qualified Data.List as L
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
--import GHC.Real (Ratio) --import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM --import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList) import Data.Array.Accelerate (toList)
import Math.KMeans (kmeans, euclidSq, elements)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -64,14 +66,11 @@ type BinSize = Double ...@@ -64,14 +66,11 @@ type BinSize = Double
takeSome :: Ord t => ListSize -> BinSize -> [Scored t] -> [Scored t] takeSome :: Ord t => ListSize -> BinSize -> [Scored t] -> [Scored t]
takeSome l s scores = L.take l takeSome l s scores = L.take l
$ takeSample n m $ takeSample n m
$ takeKmeans l' $ takeKmeans 2 scores
$ L.reverse $ L.sortOn _scored_incExc scores
where where
-- TODO : KMEAN split into 2 main clusters -- TODO : KMEAN split into 2 main clusters
-- (advice: use accelerate-example kmeans version -- (TODO: benchmark with accelerate-example kmeans version)
-- and maybe benchmark it to be sure) takeKmeans x xs = elements $ V.head $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)])) euclidSq x xs
takeKmeans = L.take
l' = 4000
n = round ((fromIntegral l)/s) n = round ((fromIntegral l)/s)
m = round $ (fromIntegral $ length scores) / (s) m = round $ (fromIntegral $ length scores) / (s)
takeSample n m xs = L.concat $ map (L.take n) takeSample n m xs = L.concat $ map (L.take n)
......
...@@ -23,6 +23,8 @@ extra-deps: ...@@ -23,6 +23,8 @@ extra-deps:
- fullstop-0.1.4 - fullstop-0.1.4
- haskell-src-exts-1.18.2 - haskell-src-exts-1.18.2
- http-types-0.12.1 - http-types-0.12.1
- kmeans-vector-0.3.2
- probable-0.1.3
- protolude-0.2 - protolude-0.2
- servant-0.13 - servant-0.13
- servant-auth-0.3.0.1 - servant-auth-0.3.0.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment