Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
c34120e3
Commit
c34120e3
authored
Jun 01, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] fix cooc behavior.
parent
09cf2917
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
132 additions
and
31 deletions
+132
-31
Pipeline.hs
src/Gargantext/Pipeline.hs
+39
-9
Prelude.hs
src/Gargantext/Prelude.hs
+0
-2
Metrics.hs
src/Gargantext/Text/Metrics.hs
+61
-4
Occurrences.hs
src/Gargantext/Text/Metrics/Occurrences.hs
+13
-11
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+3
-3
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+16
-2
No files found.
src/Gargantext/Pipeline.hs
View file @
c34120e3
...
...
@@ -16,19 +16,44 @@ module Gargantext.Pipeline
where
import
Data.Text.IO
(
readFile
)
import
Control.Arrow
((
***
))
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Map.Strict
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
L
import
Data.Tuple.Extra
(
both
)
----------------------------------------------
import
Gargantext.Core
(
Lang
(
FR
))
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Index
(
score
,
createIndices
,
toIndex
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional
)
import
Gargantext.Viz.Graph.Index
(
score
,
createIndices
,
toIndex
,
fromIndex
,
cooc2mat
,
mat2map
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
incExcSpeGen
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Occurrences
(
cooc
,
removeApax
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Data.Graph.Clustering.Louvain.CplusPlus
(
cLouvain
)
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
incExcSpeGen
m
)
n
=
nIe
+
nSg
nIe
=
30
nSg
=
70
pipeline
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
...
...
@@ -39,12 +64,17 @@ pipeline path = do
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
let
(
ti
,
fi
)
=
createIndices
myCooc
pure
ti
-- Cooc -> Matrix
let
theScores
=
M
.
take
350
$
M
.
filter
(
>
0
)
$
score
conditional
myCooc
let
(
ti
,
_
)
=
createIndices
theScores
--
---- -- Matrix -> Clustering -> Graph -> JSON
---- pure $ bestpartition False $ map2graph $ toIndex ti theScores
partitions
<-
cLouvain
$
toIndex
ti
theScores
pure
partitions
-- -- filter by spec/gen (dynmaic programming)
-- let theScores = M.filter (>0) $ score conditional myCoocFiltered
----
------ -- Matrix -> Clustering
------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
-- partitions <- cLouvain theScores
-- pure partitions
---- | Building : -> Graph -> JSON
src/Gargantext/Prelude.hs
View file @
c34120e3
...
...
@@ -237,5 +237,3 @@ unMaybe = map fromJust . L.filter isJust
-- maximumWith
maximumWith
f
=
L
.
maximumBy
(
\
x
y
->
compare
(
f
x
)
(
f
y
))
src/Gargantext/Text/Metrics.hs
View file @
c34120e3
...
...
@@ -14,12 +14,69 @@ Mainly reexport functions in @Data.Text.Metrics@
module
Gargantext.Text.Metrics
where
--import Data.Text (Text)
import
Data.Text
(
Text
,
pack
)
import
Data.List
(
concat
)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
--
--import Gargantext.Prelude
--
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.Occurrences
(
occurrences
,
cooc
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
),
terms
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
metrics_text
::
Text
metrics_text
=
"A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table."
-- | Sentences
metrics_sentences
::
[
Text
]
metrics_sentences
=
[
"A table is an object."
,
"A glas is an object."
,
"The glas is on the table."
,
"The spoon is an object."
,
"The spoon is on the table."
]
metrics_sentences_Test
=
splitBy
(
Sentences
0
)
metrics_text
==
metrics_sentences
-- | Terms reordered to visually check occurrences
metrics_terms
::
[[[
Text
]]]
metrics_terms
=
[[[
"table"
],[
"object"
]
]
,[
[
"object"
],[
"glas"
]
]
,[[
"table"
],
[
"glas"
]
]
,[
[
"object"
],
[
"spoon"
]]
,[[
"table"
],
[
"spoon"
]]
]
--metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
-- | Occurrences
{-
fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["object"],fromList [(["object"], 3 )])
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ
=
occurrences
<$>
concat
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
{-
-- fromList [((["glas"],["object"]),6)
,((["glas"],["spoon"]),4)
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc
=
cooc
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_cooc'
=
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
"The table object. The table object."
)
src/Gargantext/Text/Metrics/Occurrences.hs
View file @
c34120e3
...
...
@@ -82,9 +82,9 @@ removeApax = DMS.filter (> 1)
cooc
::
[[
Terms
]]
->
Map
(
Label
,
Label
)
Int
cooc
tss
=
mapKeys
(
delta
$
labelPolicy
terms_occs
)
$
cooc
'
(
map
(
Set
.
fromList
.
map
_terms_stem
)
tss
)
mapKeys
(
delta
$
labelPolicy
terms_occs
)
$
cooc
On
_terms_stem
tss
where
terms_occs
=
occurrences
(
List
.
concat
tss
)
terms_occs
=
occurrences
On
_terms_stem
(
List
.
concat
tss
)
delta
f
=
f
***
f
...
...
@@ -93,24 +93,26 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
Just
label
->
label
Nothing
->
panic
$
"Label of Grouped not found: "
<>
(
pack
$
show
g
)
cooc'
::
Ord
b
=>
[
Set
b
]
->
Map
(
b
,
b
)
Coocs
cooc'
tss
=
foldl'
(
\
m
(
xy
,
c
)
->
insertWith
((
+
))
xy
c
m
)
empty
xs
coocOn
::
Ord
b
=>
(
a
->
b
)
->
[[
a
]]
->
Map
(
b
,
b
)
Coocs
coocOn
f
as
=
foldl'
(
\
a
b
->
DMS
.
unionWith
(
+
)
a
b
)
empty
$
map
(
coocOn'
f
)
as
coocOn'
::
Ord
b
=>
(
a
->
b
)
->
[
a
]
->
Map
(
b
,
b
)
Coocs
coocOn'
f
ts
=
foldl'
(
\
m
(
xy
,
c
)
->
insertWith
((
+
))
xy
c
m
)
empty
xs
where
ts'
=
List
.
nub
$
map
f
ts
xs
=
[
((
x
,
y
),
1
)
|
xs
<-
tss
,
ys
<-
tss
,
x
<-
Set
.
toList
xs
,
y
<-
Set
.
toList
ys
|
x
<-
ts'
,
y
<-
ts'
,
x
<
y
]
-- | Compute the grouped occurrences (occ)
occurrences
::
[
Terms
]
->
Map
Grouped
(
Map
Terms
Int
)
occurrences
=
occurrences
'
_terms_stem
occurrences
=
occurrences
On
_terms_stem
occurrences
'
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrences
'
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
occurrences
On
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrences
On
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Occ
a
]
->
Occ
a
...
...
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
c34120e3
...
...
@@ -96,10 +96,10 @@ data Properties = Properties { _propertiesAnnotators :: Text
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
data
Sentences
=
Sentences
{
_sentences
::
[
Sentence
]}
data
PosSentences
=
Pos
Sentences
{
_sentences
::
[
Sentence
]}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
S
entences
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
P
os
Sentences
)
-- request =
...
...
@@ -134,7 +134,7 @@ corenlpRaw lang txt = do
pure
(
getResponseBody
response
)
corenlp
::
Lang
->
Text
->
IO
Sentences
corenlp
::
Lang
->
Text
->
IO
Pos
Sentences
corenlp
lang
txt
=
do
response
<-
corenlp'
lang
txt
pure
(
getResponseBody
response
)
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
c34120e3
...
...
@@ -102,8 +102,22 @@ conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
r
=
rank'
m
conditional'
::
Matrix
Double
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional'
m
=
(
run
$
ie
(
use
m
),
run
$
sg
(
use
m
))
{-
Metric Specificity and genericty: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
-}
incExcSpeGen
::
Matrix
Int
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
incExcSpeGen
m
=
(
run
$
ie
$
map
fromIntegral
$
use
m
,
run
$
sg
$
map
fromIntegral
$
use
m
)
where
ie
::
Matrix'
Double
->
Matrix'
Double
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment