Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
bf03165a
Commit
bf03165a
authored
Jun 08, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Scores] Documentation.
parent
457bf1f2
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
68 additions
and
60 deletions
+68
-60
Pipeline.hs
src/Gargantext/Pipeline.hs
+11
-23
Metrics.hs
src/Gargantext/Text/Metrics.hs
+42
-14
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+15
-23
No files found.
src/Gargantext/Pipeline.hs
View file @
bf03165a
...
...
@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
...
...
@@ -20,7 +19,6 @@ import Data.Text.IO (readFile)
import
Control.Arrow
((
***
))
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Map.Strict
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
L
import
Data.Tuple.Extra
(
both
)
----------------------------------------------
...
...
@@ -31,32 +29,22 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics
(
incExcSpeGen
)
import
Gargantext.Text.Metrics
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Data.Graph.Clustering.Louvain.CplusPlus
(
cLouvain
)
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
conditional'
m
)
n
=
nIe
+
nSg
nIe
=
30
nSg
=
70
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
|___/
-}
pipeline
path
=
do
-- Text <- IO Text <- FilePath
...
...
@@ -69,7 +57,8 @@ pipeline path = do
let
myCooc
=
removeApax
$
cooc
myterms
--let (ti, fi) = createIndices myCooc
pure
$
incExcSpeGen
myCooc
pure
True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
...
...
@@ -81,4 +70,3 @@ pipeline path = do
-- pure partitions
---- | Building : -> Graph -> JSON
src/Gargantext/Text/Metrics.hs
View file @
bf03165a
...
...
@@ -8,6 +8,12 @@ Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
TODO
noApax :: Ord a => Map a Occ -> Map a Occ
noApax m = M.filter (>1) m
-}
{-# LANGUAGE NoImplicitPrelude #-}
...
...
@@ -21,6 +27,7 @@ import Data.Map (Map)
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
both
)
--import GHC.Real (Ratio)
...
...
@@ -39,8 +46,36 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
conditional'
m
)
n
=
nIe
+
nSg
nIe
=
30
nSg
=
70
incExcSpeGen_sorted
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen_sorted
m
=
both
ordonne
(
incExcSpeGen
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
ordonne
x
=
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
metrics_text
::
Text
...
...
@@ -54,7 +89,7 @@ metrics_sentences :: [Text]
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon."
,
"I can see the glass on the table."
,
"There was only a spoon on that table."
,
"The glass just fall from the table, pouring wine e
lse
where."
,
"The glass just fall from the table, pouring wine e
very
where."
,
"I wish the glass did not contain wine."
]
...
...
@@ -89,6 +124,7 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
-}
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc_mat
=
do
m
<-
metrics_cooc
let
(
ti
,
_
)
=
createIndices
m
...
...
@@ -96,16 +132,8 @@ metrics_cooc_mat = do
pure
(
ti
,
mat_cooc
,
incExcSpeGen_proba
mat_cooc
,
incExcSpeGen
'
mat_cooc
,
incExcSpeGen
mat_cooc
)
metrics_incExcSpeGen
=
incExcSpeGen
<$>
metrics_cooc
incExcSpeGen
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen
m
=
both
(
\
x
->
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
)
(
incExcSpeGen'
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
metrics_incExcSpeGen
=
incExcSpeGen_sorted
<$>
metrics_cooc
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
bf03165a
...
...
@@ -153,25 +153,17 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- | Conditional Distance
{-
Metric Specificity and genericity: select terms
N termes
let N termes
Ni : occ de i
Nij : cooc i et j
P(i|j)=Nij/Nj Probability to get i given j
Probability to get i given j : P(i|j)=Nij/Nj
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
Inclusion (i) = Gen(i)+Spec(i)
Genericity score = Gen(i)- Spec(i)
...
...
@@ -193,17 +185,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
--
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
--
TODO find a better term
--
| Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
--
TODO find a better term
--
| Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
...
...
@@ -211,25 +205,24 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
---- | P(i|j) = N(ij) / N(jj)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
p_ij
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
p_ij
m
=
zipWith
(
/
)
m
(
n_jj
m
)
where
n_jj
::
Elt
e
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
n_jj
m
=
backpermute
(
shape
m
)
(
lift1
(
\
(
Z
:.
(
i
::
Exp
Int
)
:.
(
j
::
Exp
Int
))
(
lift1
(
\
(
Z
:.
(
_
::
Exp
Int
)
:.
(
j
::
Exp
Int
))
->
(
Z
:.
j
:.
j
)
)
)
m
-- | P(j|i) = N
(ij) / N(ii)
-- | P(j|i) = N
ij /N(ii) Probability to get i given j
-- to test
p_ji
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
Array
DIM2
e
)
->
Acc
(
Array
DIM2
e
)
p_ji
=
transpose
.
p_ij
-- | step to ckeck the result
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
m
=
run'
pro
m
where
...
...
@@ -237,7 +230,6 @@ incExcSpeGen_proba m = run' pro m
pro
mat
=
p_ji
mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment