Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
164
Issues
164
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
bf03165a
Commit
bf03165a
authored
Jun 08, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Scores] Documentation.
parent
457bf1f2
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
68 additions
and
60 deletions
+68
-60
Pipeline.hs
src/Gargantext/Pipeline.hs
+11
-23
Metrics.hs
src/Gargantext/Text/Metrics.hs
+42
-14
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+15
-23
No files found.
src/Gargantext/Pipeline.hs
View file @
bf03165a
...
@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
...
@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
Stability : experimental
Stability : experimental
Portability : POSIX
Portability : POSIX
-}
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
...
@@ -20,7 +19,6 @@ import Data.Text.IO (readFile)
...
@@ -20,7 +19,6 @@ import Data.Text.IO (readFile)
import
Control.Arrow
((
***
))
import
Control.Arrow
((
***
))
import
Data.Map.Strict
(
Map
)
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Map.Strict
as
M
import
qualified
Data.Map.Strict
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
L
import
qualified
Data.List
as
L
import
Data.Tuple.Extra
(
both
)
import
Data.Tuple.Extra
(
both
)
----------------------------------------------
----------------------------------------------
...
@@ -31,32 +29,22 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
...
@@ -31,32 +29,22 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics
(
incExcSpeGen
)
import
Gargantext.Text.Metrics
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Data.Graph.Clustering.Louvain.CplusPlus
(
cLouvain
)
import
Data.Graph.Clustering.Louvain.CplusPlus
(
cLouvain
)
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
{-
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
____ _ _
where
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
conditional'
m
)
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
n
=
nIe
+
nSg
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
nIe
=
30
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
nSg
=
70
|___/
-}
pipeline
path
=
do
pipeline
path
=
do
-- Text <- IO Text <- FilePath
-- Text <- IO Text <- FilePath
...
@@ -69,7 +57,8 @@ pipeline path = do
...
@@ -69,7 +57,8 @@ pipeline path = do
let
myCooc
=
removeApax
$
cooc
myterms
let
myCooc
=
removeApax
$
cooc
myterms
--let (ti, fi) = createIndices myCooc
--let (ti, fi) = createIndices myCooc
pure
$
incExcSpeGen
myCooc
pure
True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
-- -- filter by spec/gen (dynmaic programming)
...
@@ -81,4 +70,3 @@ pipeline path = do
...
@@ -81,4 +70,3 @@ pipeline path = do
-- pure partitions
-- pure partitions
---- | Building : -> Graph -> JSON
---- | Building : -> Graph -> JSON
src/Gargantext/Text/Metrics.hs
View file @
bf03165a
...
@@ -8,6 +8,12 @@ Stability : experimental
...
@@ -8,6 +8,12 @@ Stability : experimental
Portability : POSIX
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
Mainly reexport functions in @Data.Text.Metrics@
TODO
noApax :: Ord a => Map a Occ -> Map a Occ
noApax m = M.filter (>1) m
-}
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
...
@@ -21,6 +27,7 @@ import Data.Map (Map)
...
@@ -21,6 +27,7 @@ import Data.Map (Map)
import
qualified
Data.List
as
L
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
import
qualified
Data.Map
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
both
)
import
Data.Tuple.Extra
(
both
)
--import GHC.Real (Ratio)
--import GHC.Real (Ratio)
...
@@ -39,8 +46,36 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
...
@@ -39,8 +46,36 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Index
import
Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
conditional'
m
)
n
=
nIe
+
nSg
nIe
=
30
nSg
=
70
incExcSpeGen_sorted
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen_sorted
m
=
both
ordonne
(
incExcSpeGen
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
ordonne
x
=
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
metrics_text
::
Text
metrics_text
::
Text
...
@@ -54,7 +89,7 @@ metrics_sentences :: [Text]
...
@@ -54,7 +89,7 @@ metrics_sentences :: [Text]
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon."
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon."
,
"I can see the glass on the table."
,
"I can see the glass on the table."
,
"There was only a spoon on that table."
,
"There was only a spoon on that table."
,
"The glass just fall from the table, pouring wine e
lse
where."
,
"The glass just fall from the table, pouring wine e
very
where."
,
"I wish the glass did not contain wine."
,
"I wish the glass did not contain wine."
]
]
...
@@ -89,23 +124,16 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
...
@@ -89,23 +124,16 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
-}
-}
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc_mat
=
do
metrics_cooc_mat
=
do
m
<-
metrics_cooc
m
<-
metrics_cooc
let
(
ti
,
_
)
=
createIndices
m
let
(
ti
,
_
)
=
createIndices
m
let
mat_cooc
=
cooc2mat
ti
m
let
mat_cooc
=
cooc2mat
ti
m
pure
(
ti
pure
(
ti
,
mat_cooc
,
mat_cooc
,
incExcSpeGen_proba
mat_cooc
,
incExcSpeGen_proba
mat_cooc
,
incExcSpeGen
'
mat_cooc
,
incExcSpeGen
mat_cooc
)
)
metrics_incExcSpeGen
=
incExcSpeGen_sorted
<$>
metrics_cooc
metrics_incExcSpeGen
=
incExcSpeGen
<$>
metrics_cooc
incExcSpeGen
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen
m
=
both
(
\
x
->
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
)
(
incExcSpeGen'
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
bf03165a
...
@@ -153,25 +153,17 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
...
@@ -153,25 +153,17 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- | Conditional Distance
{-
{-
Metric Specificity and genericity: select terms
Metric Specificity and genericity: select terms
N termes
let N termes
Ni : occ de i
Ni : occ de i
Nij : cooc i et j
Probability to get i given j : P(i|j)=Nij/Nj
Nij : cooc i et j
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
P(i|j)=Nij/Nj Probability to get i given j
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
Inclusion (i) = Gen(i)+Spec(i)
Inclusion (i) = Gen(i)+Spec(i)
Genericity score = Gen(i)- Spec(i)
Genericity score = Gen(i)- Spec(i)
...
@@ -193,17 +185,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
...
@@ -193,17 +185,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
--
--
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
specificityGenericity
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
--
TODO find a better term
--
| Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
--
TODO find a better term
--
| Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
...
@@ -211,25 +205,24 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
...
@@ -211,25 +205,24 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
---- | P(i|j) = N(ij) / N(jj)
p_ij
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
p_ij
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
p_ij
m
=
zipWith
(
/
)
m
(
n_jj
m
)
p_ij
m
=
zipWith
(
/
)
m
(
n_jj
m
)
where
where
n_jj
::
Elt
e
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
n_jj
::
Elt
e
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
n_jj
m
=
backpermute
(
shape
m
)
n_jj
m
=
backpermute
(
shape
m
)
(
lift1
(
\
(
Z
:.
(
i
::
Exp
Int
)
:.
(
j
::
Exp
Int
))
(
lift1
(
\
(
Z
:.
(
_
::
Exp
Int
)
:.
(
j
::
Exp
Int
))
->
(
Z
:.
j
:.
j
)
->
(
Z
:.
j
:.
j
)
)
)
)
m
)
m
-- | P(j|i) = N
(ij) / N(ii)
-- | P(j|i) = N
ij /N(ii) Probability to get i given j
-- to test
-- to test
p_ji
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
Array
DIM2
e
)
->
Acc
(
Array
DIM2
e
)
p_ji
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
Array
DIM2
e
)
->
Acc
(
Array
DIM2
e
)
p_ji
=
transpose
.
p_ij
p_ji
=
transpose
.
p_ij
-- | step to ckeck the result
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
m
=
run'
pro
m
incExcSpeGen_proba
m
=
run'
pro
m
where
where
...
@@ -237,7 +230,6 @@ incExcSpeGen_proba m = run' pro m
...
@@ -237,7 +230,6 @@ incExcSpeGen_proba m = run' pro m
pro
mat
=
p_ji
mat
pro
mat
=
p_ji
mat
{-
{-
-- | Hypothesis to test maybe later (or not)
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
-- TODO ask accelerate for instances to ease such writtings:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment