Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
c34120e3
Commit
c34120e3
authored
Jun 01, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] fix cooc behavior.
parent
09cf2917
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
132 additions
and
31 deletions
+132
-31
Pipeline.hs
src/Gargantext/Pipeline.hs
+39
-9
Prelude.hs
src/Gargantext/Prelude.hs
+0
-2
Metrics.hs
src/Gargantext/Text/Metrics.hs
+61
-4
Occurrences.hs
src/Gargantext/Text/Metrics/Occurrences.hs
+13
-11
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+3
-3
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+16
-2
No files found.
src/Gargantext/Pipeline.hs
View file @
c34120e3
...
...
@@ -16,19 +16,44 @@ module Gargantext.Pipeline
where
import
Data.Text.IO
(
readFile
)
import
Control.Arrow
((
***
))
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Map.Strict
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.List
as
L
import
Data.Tuple.Extra
(
both
)
----------------------------------------------
import
Gargantext.Core
(
Lang
(
FR
))
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Index
(
score
,
createIndices
,
toIndex
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional
)
import
Gargantext.Viz.Graph.Index
(
score
,
createIndices
,
toIndex
,
fromIndex
,
cooc2mat
,
mat2map
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
incExcSpeGen
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Occurrences
(
cooc
,
removeApax
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Data.Graph.Clustering.Louvain.CplusPlus
(
cLouvain
)
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
incExcSpeGen
m
)
n
=
nIe
+
nSg
nIe
=
30
nSg
=
70
pipeline
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
...
...
@@ -39,12 +64,17 @@ pipeline path = do
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
let
(
ti
,
fi
)
=
createIndices
myCooc
pure
ti
-- Cooc -> Matrix
let
theScores
=
M
.
take
350
$
M
.
filter
(
>
0
)
$
score
conditional
myCooc
let
(
ti
,
_
)
=
createIndices
theScores
--
---- -- Matrix -> Clustering -> Graph -> JSON
---- pure $ bestpartition False $ map2graph $ toIndex ti theScores
partitions
<-
cLouvain
$
toIndex
ti
theScores
pure
partitions
-- -- filter by spec/gen (dynmaic programming)
-- let theScores = M.filter (>0) $ score conditional myCoocFiltered
----
------ -- Matrix -> Clustering
------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
-- partitions <- cLouvain theScores
-- pure partitions
---- | Building : -> Graph -> JSON
src/Gargantext/Prelude.hs
View file @
c34120e3
...
...
@@ -237,5 +237,3 @@ unMaybe = map fromJust . L.filter isJust
-- maximumWith
maximumWith
f
=
L
.
maximumBy
(
\
x
y
->
compare
(
f
x
)
(
f
y
))
src/Gargantext/Text/Metrics.hs
View file @
c34120e3
...
...
@@ -14,12 +14,69 @@ Mainly reexport functions in @Data.Text.Metrics@
module
Gargantext.Text.Metrics
where
--import Data.Text (Text)
import
Data.Text
(
Text
,
pack
)
import
Data.List
(
concat
)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
--
--import Gargantext.Prelude
--
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.Occurrences
(
occurrences
,
cooc
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
),
terms
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
metrics_text
::
Text
metrics_text
=
"A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table."
-- | Sentences
metrics_sentences
::
[
Text
]
metrics_sentences
=
[
"A table is an object."
,
"A glas is an object."
,
"The glas is on the table."
,
"The spoon is an object."
,
"The spoon is on the table."
]
metrics_sentences_Test
=
splitBy
(
Sentences
0
)
metrics_text
==
metrics_sentences
-- | Terms reordered to visually check occurrences
metrics_terms
::
[[[
Text
]]]
metrics_terms
=
[[[
"table"
],[
"object"
]
]
,[
[
"object"
],[
"glas"
]
]
,[[
"table"
],
[
"glas"
]
]
,[
[
"object"
],
[
"spoon"
]]
,[[
"table"
],
[
"spoon"
]]
]
--metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
-- | Occurrences
{-
fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["object"],fromList [(["object"], 3 )])
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ
=
occurrences
<$>
concat
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
{-
-- fromList [((["glas"],["object"]),6)
,((["glas"],["spoon"]),4)
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc
=
cooc
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_cooc'
=
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
"The table object. The table object."
)
src/Gargantext/Text/Metrics/Occurrences.hs
View file @
c34120e3
...
...
@@ -82,9 +82,9 @@ removeApax = DMS.filter (> 1)
cooc
::
[[
Terms
]]
->
Map
(
Label
,
Label
)
Int
cooc
tss
=
mapKeys
(
delta
$
labelPolicy
terms_occs
)
$
cooc
'
(
map
(
Set
.
fromList
.
map
_terms_stem
)
tss
)
mapKeys
(
delta
$
labelPolicy
terms_occs
)
$
cooc
On
_terms_stem
tss
where
terms_occs
=
occurrences
(
List
.
concat
tss
)
terms_occs
=
occurrences
On
_terms_stem
(
List
.
concat
tss
)
delta
f
=
f
***
f
...
...
@@ -93,24 +93,26 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
Just
label
->
label
Nothing
->
panic
$
"Label of Grouped not found: "
<>
(
pack
$
show
g
)
cooc'
::
Ord
b
=>
[
Set
b
]
->
Map
(
b
,
b
)
Coocs
cooc'
tss
=
foldl'
(
\
m
(
xy
,
c
)
->
insertWith
((
+
))
xy
c
m
)
empty
xs
coocOn
::
Ord
b
=>
(
a
->
b
)
->
[[
a
]]
->
Map
(
b
,
b
)
Coocs
coocOn
f
as
=
foldl'
(
\
a
b
->
DMS
.
unionWith
(
+
)
a
b
)
empty
$
map
(
coocOn'
f
)
as
coocOn'
::
Ord
b
=>
(
a
->
b
)
->
[
a
]
->
Map
(
b
,
b
)
Coocs
coocOn'
f
ts
=
foldl'
(
\
m
(
xy
,
c
)
->
insertWith
((
+
))
xy
c
m
)
empty
xs
where
ts'
=
List
.
nub
$
map
f
ts
xs
=
[
((
x
,
y
),
1
)
|
xs
<-
tss
,
ys
<-
tss
,
x
<-
Set
.
toList
xs
,
y
<-
Set
.
toList
ys
|
x
<-
ts'
,
y
<-
ts'
,
x
<
y
]
-- | Compute the grouped occurrences (occ)
occurrences
::
[
Terms
]
->
Map
Grouped
(
Map
Terms
Int
)
occurrences
=
occurrences
'
_terms_stem
occurrences
=
occurrences
On
_terms_stem
occurrences
'
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrences
'
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
occurrences
On
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrences
On
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Occ
a
]
->
Occ
a
...
...
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
c34120e3
...
...
@@ -96,10 +96,10 @@ data Properties = Properties { _propertiesAnnotators :: Text
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
data
Sentences
=
Sentences
{
_sentences
::
[
Sentence
]}
data
PosSentences
=
Pos
Sentences
{
_sentences
::
[
Sentence
]}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
S
entences
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
P
os
Sentences
)
-- request =
...
...
@@ -134,7 +134,7 @@ corenlpRaw lang txt = do
pure
(
getResponseBody
response
)
corenlp
::
Lang
->
Text
->
IO
Sentences
corenlp
::
Lang
->
Text
->
IO
Pos
Sentences
corenlp
lang
txt
=
do
response
<-
corenlp'
lang
txt
pure
(
getResponseBody
response
)
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
c34120e3
...
...
@@ -102,8 +102,22 @@ conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
r
=
rank'
m
conditional'
::
Matrix
Double
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional'
m
=
(
run
$
ie
(
use
m
),
run
$
sg
(
use
m
))
{-
Metric Specificity and genericty: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
-}
incExcSpeGen
::
Matrix
Int
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
incExcSpeGen
m
=
(
run
$
ie
$
map
fromIntegral
$
use
m
,
run
$
sg
$
map
fromIntegral
$
use
m
)
where
ie
::
Matrix'
Double
->
Matrix'
Double
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment