Metrics.hs 3.58 KB
Newer Older
1
{-|
2
Module      : Gargantext.Core.Text.Metrics
3 4 5 6
Description : All parsers of Gargantext in one file.
Copyright   : (c) CNRS, 2017 - present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
7 8 9
Stability   : experimental
Portability : POSIX

10
Mainly reexport functions in @Data.Text.Metrics@
11

12 13
-}

14
{-# LANGUAGE TemplateHaskell   #-}
15

16
module Gargantext.Core.Text.Metrics
17
  where
18

19
--import Data.Array.Accelerate ((:.)(..), Z(..))
20
--import Math.KMeans (kmeans, euclidSq, elements)
21
import Control.Lens (makeLenses)
22
import Data.Map (Map)
23
import Data.Monoid (Monoid, mempty)
24 25
import Data.HashMap.Strict (HashMap)
import Data.Semigroup (Semigroup)
26
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
27
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
28 29
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
30
import qualified Data.Array.Accelerate as DAA
31 32
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Map  as Map
Nicolas Pouillard's avatar
Nicolas Pouillard committed
33
import qualified Data.Vector as V
34
import qualified Data.Vector.Storable as Vec
35 36
import qualified Data.HashMap.Strict as HashMap

37

38
type MapListSize = Int
39
type InclusionSize = Int
40

41 42
scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map . Map.fromList . HashMap.toList
43 44 45
  where
    scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
    scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
46

Nicolas Pouillard's avatar
Nicolas Pouillard committed
47 48
    map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
    map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
49 50 51 52

-- TODO change type with (x,y)
data Scored ts = Scored
  { _scored_terms  :: !ts
53 54
  , _scored_genInc :: !GenericityInclusion
  , _scored_speExc :: !SpecificityExclusion
55
  } deriving (Show, Eq, Ord)
56

57 58 59 60 61 62 63 64 65
instance Monoid a => Monoid (Scored a) where
  mempty = Scored mempty mempty mempty

instance Semigroup a => Semigroup (Scored a) where
  (<>) (Scored a  b  c )
       (Scored _a' b' c')
      = Scored (a {-<> a'-})
               (b <> b')
               (c <> c')
66

67 68
localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
69 70
                                         (Map.toList fi)
                                          scores
71
  where
72
    (ti, fi) = createIndices m
73
    (is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
74 75 76 77
    scores   = DAA.toList
             $ DAA.run
             $ DAA.zip (DAA.use is) (DAA.use ss)

78
-- TODO Code to be removed below
79 80 81
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
scored' :: Ord t => Map (t,t) Int -> [Scored t]
82
scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
83
  where
84
    (ti, fi) = createIndices m
85
    (is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
86 87 88 89 90
    scores   = DAA.toList
             $ DAA.run
             $ DAA.zip (DAA.use is) (DAA.use ss)


91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal ss = map (\(Scored t s1 s2)
                     -> Scored t ((s1 - s1min) / s1max)
                                 ((s2 - s2min) / s2max)) ss
  where
    ss1 = map _scored_genInc ss
    ss2 = map _scored_speExc ss

    s1min = minimum ss1
    s1max = maximum ss1

    s2min = minimum ss2
    s2max = maximum ss2



normalizeLocal :: Scored a -> Scored a
normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
  where
    log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)



114 115
-- | Type Instances
makeLenses 'Scored