Commit fe2a3c30 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[STATS] refactor PCA dim reduction.

parent 9ba0327c
......@@ -31,7 +31,7 @@ import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.SVM as SVM
import qualified Data.SVM as SVM
import qualified Data.Vector as Vec
------------------------------------------------------------------------
......
......@@ -29,29 +29,26 @@ import GHC.Real (round)
import Gargantext.Prelude
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.List as List
import qualified Data.Map as Map
import Numeric.Statistics.PCA (pcaReduceN)
import qualified Data.Vector.Storable as Vec
import Data.Array.IArray (Array, listArray, elems)
type GraphListSize = Int
type InclusionSize = Int
toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored = map2scored
. (reduceTo (Dimension 2))
. (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (reduceTo (Dimension 2)) . scored2map
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
......@@ -66,20 +63,6 @@ data Scored ts = Scored
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
data Dimension = Dimension Int
reduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
where
ss'' :: Array Int (Vec.Vector Double)
ss'' = listArray (1, List.length ss') ss'
(txts,ss') = List.unzip $ Map.toList ss
localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
......@@ -92,8 +75,6 @@ localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [i
$ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be remove below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
......@@ -107,10 +88,6 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen
......@@ -134,4 +111,3 @@ linearTakes gls incSize speGen incExc = take gls
. splitEvery incSize
. sortOn speGen
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment