1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
{-|
Module : Gargantext.Core.Text.Metrics
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Metrics
where
import Data.Array.Accelerate qualified as DAA
import Data.Array.Accelerate.Interpreter qualified as DAA
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Vector qualified as V
import Data.Vector.Storable qualified as Vec
import Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
type MapListSize = Int
type InclusionSize = Int
scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map . Map.fromList . HashMap.toList
where
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
-- TODO change type with (x,y)
data Scored ts = Scored
{ _scored_terms :: !ts
, _scored_genInc :: !GenericityInclusion
, _scored_speExc :: !SpecificityExclusion
} deriving (Show, Eq, Ord)
instance Monoid a => Monoid (Scored a) where
mempty = Scored mempty mempty mempty
instance Semigroup a => Semigroup (Scored a) where
(<>) (Scored a b c )
(Scored _a' b' c')
= Scored (a {-<> a'-})
(b <> b')
(c <> c')
localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be removed below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
scored' :: Ord t => Map (t,t) Int -> [Scored t]
scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal ss = map (\(Scored t s1 s2)
-> Scored t ((s1 - s1min) / s1max)
((s2 - s2min) / s2max)) ss
where
ss1 = map _scored_genInc ss
ss2 = map _scored_speExc ss
s1min = minimum ss1
s1max = maximum ss1
s2min = minimum ss2
s2max = maximum ss2
normalizeLocal :: Scored a -> Scored a
normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
where
log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
-- | Type Instances
makeLenses 'Scored