Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d710b723
Commit
d710b723
authored
Jun 08, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'pipeline'
parents
7e930b57
b5124f1e
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
422 additions
and
126 deletions
+422
-126
.gitignore
.gitignore
+5
-0
package.yaml
package.yaml
+1
-1
Types.hs
src/Gargantext/Core/Types.hs
+0
-5
Pipeline.hs
src/Gargantext/Pipeline.hs
+41
-12
Prelude.hs
src/Gargantext/Prelude.hs
+0
-2
Text.hs
src/Gargantext/Text.hs
+1
-1
Metrics.hs
src/Gargantext/Text/Metrics.hs
+121
-7
Count.hs
src/Gargantext/Text/Metrics/Count.hs
+26
-18
FrequentItemSet.hs
src/Gargantext/Text/Metrics/FrequentItemSet.hs
+46
-19
Terms.hs
src/Gargantext/Text/Terms.hs
+10
-3
Multi.hs
src/Gargantext/Text/Terms/Multi.hs
+7
-3
En.hs
src/Gargantext/Text/Terms/Multi/Lang/En.hs
+2
-2
Fr.hs
src/Gargantext/Text/Terms/Multi/Lang/Fr.hs
+2
-2
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+3
-3
Graph.hs
src/Gargantext/Viz/Graph.hs
+0
-5
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+136
-33
Index.hs
src/Gargantext/Viz/Graph/Index.hs
+20
-9
stack.yaml
stack.yaml
+1
-1
No files found.
.gitignore
View file @
d710b723
...
...
@@ -2,3 +2,8 @@
*.swp
*.cabal
*purescript-gargantext
doc
bin
clustering-louvain
profiling
servant-job
package.yaml
View file @
d710b723
...
...
@@ -35,7 +35,6 @@ library:
dependencies
:
-
QuickCheck
-
accelerate
-
accelerate-io
-
aeson
-
aeson-lens
-
aeson-pretty
...
...
@@ -46,6 +45,7 @@ library:
-
bytestring
-
case-insensitive
-
cassava
-
clustering-louvain
-
conduit
-
conduit-extra
-
containers
...
...
src/Gargantext/Core/Types.hs
View file @
d710b723
...
...
@@ -46,15 +46,10 @@ data Terms = Terms { _terms_label :: Label
instance
Show
Terms
where
show
(
Terms
l
s
)
=
show
l
-- class Inclusion where include
--instance Eq Terms where
-- (==) (Terms _ s1) (Terms _ s2) = s1 `S.isSubsetOf` s2
-- || s2 `S.isSubsetOf` s1
instance
Eq
Terms
where
(
==
)
(
Terms
_
s1
)
(
Terms
_
s2
)
=
s1
==
s2
------------------------------------------------------------------------
data
Tag
=
POS
|
NER
deriving
(
Show
,
Eq
)
...
...
src/Gargantext/Pipeline.hs
View file @
d710b723
...
...
@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
...
...
@@ -17,27 +16,57 @@ module Gargantext.Pipeline
import
Data.Text.IO
(
readFile
)
import
Control.Arrow
((
***
))
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Map.Strict
as
M
import
qualified
Data.List
as
L
import
Data.Tuple.Extra
(
both
)
----------------------------------------------
----------------------------------------------
import
Gargantext.Core
import
Gargantext.Core
(
Lang
(
FR
))
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Index
(
score
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
distributional
)
import
Gargantext.Text.Metrics.Occurrences
import
Gargantext.Text.Terms
import
Gargantext.Text.Context
import
Gargantext.Viz.Graph.Index
(
score
,
createIndices
,
toIndex
,
fromIndex
,
cooc2mat
,
mat2map
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Data.Graph.Clustering.Louvain.CplusPlus
(
cLouvain
)
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
|___/
-}
pipeline
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
3
)
text
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
5
)
text
myterms
<-
extractTerms
Multi
FR
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
--let (ti, fi) = createIndices myCooc
pure
True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
pure
$
score
distributional
myCooc
-- Matrix -> Clustering -> Graph -> JSON
-- -- filter by spec/gen (dynmaic programming)
-- let theScores = M.filter (>0) $ score conditional myCoocFiltered
----
------ -- Matrix -> Clustering
------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
-- partitions <- cLouvain theScores
-- pure partitions
---- | Building : -> Graph -> JSON
src/Gargantext/Prelude.hs
View file @
d710b723
...
...
@@ -237,5 +237,3 @@ unMaybe = map fromJust . L.filter isJust
-- maximumWith
maximumWith
f
=
L
.
maximumBy
(
\
x
y
->
compare
(
f
x
)
(
f
y
))
src/Gargantext/Text.hs
View file @
d710b723
...
...
@@ -27,7 +27,7 @@ import NLP.FullStop (segment)
-----------------------------------------------------------------
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Text.Metrics.
Occurrences
(
Occ
,
occurrences
,
cooc
)
import
Gargantext.Text.Metrics.
Count
(
Occ
,
occurrences
,
cooc
)
import
Gargantext.Prelude
hiding
(
filter
)
-----------------------------------------------------------------
...
...
src/Gargantext/Text/Metrics.hs
View file @
d710b723
...
...
@@ -8,18 +8,132 @@ Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
TODO
noApax :: Ord a => Map a Occ -> Map a Occ
noApax m = M.filter (>1) m
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Metrics
where
module
Gargantext.Text.Metrics
where
--import Data.Text (Text)
import
Data.Text
(
Text
,
pack
)
import
Data.Map
(
Map
)
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
import
qualified
Data.Set
as
S
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
both
)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
--
--import Gargantext.Prelude
--
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
import
Data.Array.Accelerate
(
toList
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.Count
(
occurrences
,
cooc
)
import
Gargantext.Text.Terms
(
TermType
(
MonoMulti
),
terms
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Index
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
(
incExc'
,
speGen'
)
=
both
(
map
fst
.
L
.
sortOn
snd
.
M
.
toList
.
mat2map
)
(
conditional'
m
)
n
=
nIe
+
nSg
nIe
=
30
nSg
=
70
incExcSpeGen_sorted
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen_sorted
m
=
both
ordonne
(
incExcSpeGen
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
ordonne
x
=
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
metrics_text
::
Text
metrics_text
=
T
.
intercalate
" "
metrics_sentences
metrics_sentences'
::
[
Text
]
metrics_sentences'
=
splitBy
(
Sentences
0
)
metrics_text
-- | Sentences
metrics_sentences
::
[
Text
]
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon."
,
"I can see the glass on the table."
,
"There was only a spoon on that table."
,
"The glass just fall from the table, pouring wine everywhere."
,
"I wish the glass did not contain wine."
]
metrics_sentences_Test
=
metrics_sentences
==
metrics_sentences'
-- | Terms reordered to visually check occurrences
-- >>>
{- [ [["table"],["glass"],["wine"],["spoon"]]
, [["glass"],["table"]]
, [["spoon"],["table"]]
, [["glass"],["table"],["wine"]]
, [["glass"],["wine"]]
]
-}
metrics_terms
::
IO
[[
Terms
]]
metrics_terms
=
mapM
(
terms
MonoMulti
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
-- | Occurrences
{-
fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["object"],fromList [(["object"], 3 )])
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ
=
occurrences
<$>
L
.
concat
<$>
metrics_terms
{-
-- fromList [((["glas"],["object"]),6)
,((["glas"],["spoon"]),4)
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc_mat
=
do
m
<-
metrics_cooc
let
(
ti
,
_
)
=
createIndices
m
let
mat_cooc
=
cooc2mat
ti
m
pure
(
ti
,
mat_cooc
,
incExcSpeGen_proba
mat_cooc
,
incExcSpeGen
mat_cooc
)
metrics_incExcSpeGen
=
incExcSpeGen_sorted
<$>
metrics_cooc
src/Gargantext/Text/Metrics/
Occurrences
.hs
→
src/Gargantext/Text/Metrics/
Count
.hs
View file @
d710b723
{-|
Module : Gargantext.Text.Metrics.
Occurrences
Module : Gargantext.Text.Metrics.
Count
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -25,7 +25,7 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Metrics.
Occurrences
module
Gargantext.Text.Metrics.
Count
where
...
...
@@ -71,7 +71,6 @@ type Grouped = Stems
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
----
-}
type
Occs
=
Int
...
...
@@ -81,10 +80,16 @@ removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int
removeApax
=
DMS
.
filter
(
>
1
)
cooc
::
[[
Terms
]]
->
Map
(
Label
,
Label
)
Int
cooc
tss
=
mapKeys
(
delta
$
labelPolicy
terms_occs
)
$
cooc'
(
map
(
Set
.
fromList
.
map
_terms_stem
)
tss
)
cooc
tss
=
coocOnWithLabel
_terms_stem
(
labelPolicy
terms_occs
)
tss
where
terms_occs
=
occurrencesOn
_terms_stem
(
List
.
concat
tss
)
coocOnWithLabel
::
(
Ord
label
,
Ord
b
)
=>
(
a
->
b
)
->
(
b
->
label
)
->
[[
a
]]
->
Map
(
label
,
label
)
Coocs
coocOnWithLabel
on
policy
tss
=
mapKeys
(
delta
policy
)
$
coocOn
on
tss
where
terms_occs
=
occurrences
(
List
.
concat
tss
)
delta
f
=
f
***
f
...
...
@@ -93,26 +98,29 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
Just
label
->
label
Nothing
->
panic
$
"Label of Grouped not found: "
<>
(
pack
$
show
g
)
cooc
'
::
Ord
b
=>
[
Set
b
]
->
Map
(
b
,
b
)
Coocs
cooc
'
tss
=
foldl'
(
\
m
(
xy
,
c
)
->
insertWith
((
+
))
xy
c
m
)
empty
x
s
cooc
On
::
Ord
b
=>
(
a
->
b
)
->
[[
a
]
]
->
Map
(
b
,
b
)
Coocs
cooc
On
f
as
=
foldl'
(
\
a
b
->
DMS
.
unionWith
(
+
)
a
b
)
empty
$
map
(
coocOn'
f
)
a
s
where
xs
=
[
((
x
,
y
),
1
)
|
xs
<-
tss
,
ys
<-
tss
,
x
<-
Set
.
toList
xs
,
y
<-
Set
.
toList
ys
,
x
<
y
]
coocOn'
::
Ord
b
=>
(
a
->
b
)
->
[
a
]
->
Map
(
b
,
b
)
Coocs
coocOn'
f
ts
=
foldl'
(
\
m
(
xy
,
c
)
->
insertWith
((
+
))
xy
c
m
)
empty
xs
where
ts'
=
List
.
nub
$
map
f
ts
xs
=
[
((
x
,
y
),
1
)
|
x
<-
ts'
,
y
<-
ts'
-- , x /= y
]
-- | Compute the grouped occurrences (occ)
occurrences
::
[
Terms
]
->
Map
Grouped
(
Map
Terms
Int
)
occurrences
=
occurrences
'
_terms_stem
occurrences
=
occurrences
On
_terms_stem
occurrences
'
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrences
'
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
occurrences
On
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrences
On
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Occ
a
]
->
Occ
a
sumOcc
xs
=
foldl'
(
unionWith
(
+
))
empty
xs
...
...
src/Gargantext/Text/Metrics/FrequentItemSet.hs
View file @
d710b723
...
...
@@ -14,42 +14,52 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.Text.Metrics.FrequentItemSet
(
Fis
,
Size
(
Fis
,
Size
(
..
)
,
occ_hlcm
,
cooc_hlcm
,
all
,
between
,
fisWithSize
,
fisWith
,
fisWithSizePoly
,
fisWithSizePoly2
,
module
HLCM
)
where
import
Data.List
(
tail
,
filter
)
import
Data.Either
import
Prelude
(
Functor
(
..
))
-- TODO
import
qualified
Data.Map.Strict
as
Map
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Set
as
Set
import
Data.Set
(
Set
)
import
qualified
Data.Vector
as
V
import
Data.Vector
(
Vector
)
import
Data.List
(
filter
,
concat
)
import
Data.Maybe
(
catMaybes
)
import
HLCM
import
Gargantext.Prelude
type
Size
=
Either
Int
(
Int
,
Int
)
--data Size = Point | Segment
data
Size
=
Point
Int
|
Segment
Int
Int
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
occ_hlcm
::
Frequency
->
[[
Item
]]
->
[
Fis
]
occ_hlcm
f
is
=
fisWithSize
(
Left
1
)
f
is
occ_hlcm
=
fisWithSize
(
Point
1
)
-- | Cooccurrence is Frequent Item Set of size 2
cooc_hlcm
::
Frequency
->
[[
Item
]]
->
[
Fis
]
cooc_hlcm
f
is
=
fisWithSize
(
Left
2
)
f
is
cooc_hlcm
=
fisWithSize
(
Point
2
)
all
::
Frequency
->
[[
Item
]]
->
[
Fis
]
all
f
is
=
fisWith
Nothing
f
is
all
=
fisWith
Nothing
------------------------------------------------------------------------
between
::
(
Int
,
Int
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
between
(
x
,
y
)
f
is
=
fisWithSize
(
Right
(
x
,
y
))
f
is
between
(
x
,
y
)
=
fisWithSize
(
Segment
x
y
)
--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m
f is = between (0,m) f is
--maximum m
= between (0,m)
------------------------------------------------------------------------
...
...
@@ -62,31 +72,48 @@ data Fis' a = Fis' { _fisCount :: Int
,
_fisItemSet
::
[
a
]
}
deriving
(
Show
)
instance
Functor
Fis'
where
fmap
f
(
Fis'
c
is
)
=
Fis'
c
(
fmap
f
is
)
-- | Sugar from items to FIS
items2fis
::
[
Item
]
->
Maybe
Fis
items2fis
is
=
case
head
is
of
Nothing
->
Nothing
Just
h
->
Just
(
Fis'
h
(
tail
is
))
items2fis
[]
=
Nothing
items2fis
(
i
:
is
)
=
Just
$
Fis'
i
is
------------------------------------------------------------------------
------------------------------------------------------------------------
fisWithSize
::
Size
->
Frequency
->
[[
Item
]]
->
[
Fis
]
fisWithSize
n
f
is
=
case
n
of
Left
n'
->
fisWith
(
Just
(
\
x
->
length
x
==
(
n'
+
1
)
))
f
is
Right
(
a
,
b
)
->
fisWith
(
Just
(
\
x
->
cond1
a
x
&&
cond2
b
x
))
f
is
Point
n'
->
fisWith
(
Just
(
\
x
->
length
x
==
(
n'
+
1
)
))
f
is
Segment
a
b
->
fisWith
(
Just
(
\
x
->
cond
a
(
length
x
)
b
))
f
is
where
cond1
a'
x
=
length
x
>=
a'
cond2
b'
x
=
length
x
<=
b'
cond
a'
x
b'
=
a'
<=
x
&&
x
<=
b'
fisWith
::
Maybe
([
Item
]
->
Bool
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
fisWith
s
f
is
=
unMaybe
$
map
items2fis
$
filter'
$
runLCMmatrix
is
f
fisWith
s
f
is
=
catMaybes
$
map
items2fis
$
filter'
$
runLCMmatrix
is
f
where
filter'
=
case
s
of
Nothing
->
identity
Just
fun
->
filter
fun
-- Here the sole purpose to take the keys as a Set is tell we do not want
-- duplicates.
fisWithSizePoly
::
Ord
a
=>
Size
->
Frequency
->
Set
a
->
[[
a
]]
->
[
Fis'
a
]
fisWithSizePoly
n
f
ks
=
map
(
fmap
fromItem
)
.
fisWithSize
n
f
.
map
(
map
toItem
)
where
ksv
=
V
.
fromList
$
Set
.
toList
ks
ksm
=
Map
.
fromList
.
flip
zip
[
0
..
]
$
V
.
toList
ksv
toItem
=
(
ksm
Map
.!
)
fromItem
=
(
ksv
V
.!
)
fisWithSizePoly2
::
Ord
a
=>
Size
->
Frequency
->
[[
a
]]
->
[
Fis'
a
]
fisWithSizePoly2
n
f
is
=
fisWithSizePoly
n
f
ks
is
where
ks
=
Set
.
fromList
$
concat
is
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Text/Terms.hs
View file @
d710b723
...
...
@@ -42,16 +42,23 @@ import Gargantext.Core.Types
import
Gargantext.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Text.Terms.Mono
(
monoterms'
)
data
TermType
=
Mono
|
Multi
data
TermType
=
Mono
|
Multi
|
MonoMulti
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user).
extractTerms
::
Traversable
t
=>
TermType
->
Lang
->
t
Text
->
IO
(
t
[
Terms
])
extractTerms
termType
lang
=
mapM
(
terms
termType
lang
)
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
-- Multi : multi terms
-- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms
::
TermType
->
Lang
->
Text
->
IO
[
Terms
]
terms
Mono
lang
txt
=
pure
$
monoterms'
lang
txt
terms
Multi
lang
txt
=
multiterms
lang
txt
terms
Mono
lang
txt
=
pure
$
monoterms'
lang
txt
terms
Multi
lang
txt
=
multiterms
lang
txt
terms
MonoMulti
lang
txt
=
terms
Multi
lang
txt
------------------------------------------------------------------------
src/Gargantext/Text/Terms/Multi.hs
View file @
d710b723
...
...
@@ -18,23 +18,27 @@ module Gargantext.Text.Terms.Multi (multiterms)
import
Data.Text
hiding
(
map
,
group
,
filter
,
concat
)
import
Data.List
(
concat
)
import
qualified
Data.Set
as
S
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Text.Terms.Multi.PosTagging
import
Gargantext.Text.Terms.Mono.Stem
(
stem
)
import
qualified
Gargantext.Text.Terms.Multi.Lang.En
as
En
import
qualified
Gargantext.Text.Terms.Multi.Lang.Fr
as
Fr
multiterms
::
Lang
->
Text
->
IO
[
Terms
]
multiterms
lang
txt
=
concat
<$>
map
(
map
tokenTag2terms
)
<$>
map
(
map
(
tokenTag2terms
lang
)
)
<$>
map
(
filter
(
\
t
->
_my_token_pos
t
==
Just
NP
))
<$>
tokenTags
lang
txt
tokenTag2terms
::
TokenTag
->
Terms
tokenTag2terms
(
TokenTag
w
t
_
_
)
=
Terms
w
t
tokenTag2terms
::
Lang
->
TokenTag
->
Terms
tokenTag2terms
lang
(
TokenTag
w
t
_
_
)
=
Terms
w
t'
where
t'
=
S
.
fromList
$
map
(
stem
lang
)
$
S
.
toList
t
tokenTags
::
Lang
->
Text
->
IO
[[
TokenTag
]]
tokenTags
lang
s
=
map
(
group
lang
)
<$>
tokenTags'
lang
s
...
...
src/Gargantext/Text/Terms/Multi/Lang/En.hs
View file @
d710b723
...
...
@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group
[]
=
[]
group
ntags
=
group2
NP
NP
$
group2
NP
VB
$
group2
NP
IN
--
$ group2 NP IN
$
group2
IN
DT
$
group2
VB
NP
--
$ group2 VB NP
$
group2
JJ
NP
$
group2
JJ
JJ
$
group2
JJ
CC
...
...
src/Gargantext/Text/Terms/Multi/Lang/Fr.hs
View file @
d710b723
...
...
@@ -27,8 +27,8 @@ group :: [TokenTag] -> [TokenTag]
group
[]
=
[]
group
ntags
=
group2
NP
NP
$
group2
NP
VB
$
group2
NP
IN
$
group2
IN
DT
--
$ group2 NP IN
--
$ group2 IN DT
$
group2
VB
NP
$
group2
JJ
NP
$
group2
NP
JJ
...
...
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
d710b723
...
...
@@ -96,10 +96,10 @@ data Properties = Properties { _propertiesAnnotators :: Text
$
(
deriveJSON
(
unPrefix
"_properties"
)
''
P
roperties
)
data
Sentences
=
Sentences
{
_sentences
::
[
Sentence
]}
data
PosSentences
=
Pos
Sentences
{
_sentences
::
[
Sentence
]}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
S
entences
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
P
os
Sentences
)
-- request =
...
...
@@ -134,7 +134,7 @@ corenlpRaw lang txt = do
pure
(
getResponseBody
response
)
corenlp
::
Lang
->
Text
->
IO
Sentences
corenlp
::
Lang
->
Text
->
IO
Pos
Sentences
corenlp
lang
txt
=
do
response
<-
corenlp'
lang
txt
pure
(
getResponseBody
response
)
...
...
src/Gargantext/Viz/Graph.hs
View file @
d710b723
...
...
@@ -60,8 +60,3 @@ $(deriveJSON (unPrefix "g_") ''Graph)
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
d710b723
...
...
@@ -28,10 +28,12 @@ Implementation use Accelerate library :
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.Viz.Graph.Distances.Matrice
where
...
...
@@ -46,7 +48,7 @@ import Data.Maybe (Maybe(Just))
import
qualified
Gargantext.Prelude
as
P
import
qualified
Data.Array.Accelerate.Array.Representation
as
Repr
import
Gargantext.Text.Metrics.
Occurrences
import
Gargantext.Text.Metrics.
Count
-----------------------------------------------------------------------
...
...
@@ -67,33 +69,48 @@ myMat n = matrix n [1..]
rank
::
(
Matrix
a
)
->
Int
rank
m
=
arrayRank
$
arrayShape
m
rank'
::
(
Matrix
a
)
->
Int
rank'
m
=
n
-----------------------------------------------------------------------
-- | Dimension of a square Matrix
-- How to force use with SquareMatrix ?
type
Dim
=
Int
dim
::
(
Matrix
a
)
->
Dim
dim
m
=
n
where
Z
:.
_
:.
n
=
arrayShape
m
-- == indexTail (arrayShape m)
-----------------------------------------------------------------------
-- | Conditional Distance
proba
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
proba
r
mat
=
zipWith
(
/
)
mat
(
mkSum
r
mat
)
type
Rank
=
Int
mkSum
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
sum
mat
proba
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
proba
r
mat
=
zipWith
(
/
)
mat
(
mkSum
r
mat
)
-- divByDiag
divByDiag
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
divByDiag
d
mat
=
zipWith
(
/
)
mat
(
replicate
(
constant
(
Z
:.
(
d
::
Int
)
:.
All
))
$
diag
mat
)
where
diag
::
Elt
e
=>
Acc
(
Matrix
e
)
->
Acc
(
Vector
e
)
diag
m
=
backpermute
(
indexTail
(
shape
m
))
(
lift1
(
\
(
Z
:.
x
)
->
(
Z
:.
x
:.
(
x
::
Exp
Int
))))
m
-----------------------------------------------------------------------
mkSum
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
fold
(
+
)
0
mat
miniMax
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
-- | Conditional distance (basic version)
conditional
::
Matrix
Int
->
Matrix
Double
conditional
m
=
run
(
miniMax
$
proba
(
dim
m
)
$
map
fromIntegral
$
use
m
)
type
Matrix'
a
=
Acc
(
Matrix
a
)
type
InclusionExclusion
=
Double
type
SpecificityGenericity
=
Double
conditional
::
Matrix
Double
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional
m
=
(
run
$
ie
(
use
m
),
run
$
sg
(
use
m
))
-- | Conditional distance (advanced version)
conditional'
::
Matrix
Int
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional'
m
=
(
run
$
ie
$
map
fromIntegral
$
use
m
,
run
$
sg
$
map
fromIntegral
$
use
m
)
where
ie
::
Matrix'
Double
->
Matrix'
Double
ie
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ie
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
+
)
(
xs
mat
)
(
ys
mat
)
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
sg
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
-
)
(
xs
mat
)
(
ys
mat
)
...
...
@@ -101,27 +118,21 @@ conditional m = (run $ ie (use m), run $ sg (use m))
n
::
Exp
Double
n
=
P
.
fromIntegral
r
r
::
Rank
r
=
rank'
m
r
::
Dim
r
=
dim
m
xs
::
Matrix'
Double
->
Matrix'
Double
xs
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
xs
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
proba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
transpose
$
proba
r
mat
)
-- filter with threshold
-----------------------------------------------------------------------
-- | Distributional Distance
distributional
::
Matrix
Int
->
Matrix
Double
distributional
m
=
run
$
filter
$
ri
(
map
fromIntegral
$
use
m
)
distributional
m
=
run
$
miniMax
$
ri
(
map
fromIntegral
$
use
m
)
where
n
=
rank'
m
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
n
=
dim
m
filter
m
=
zipWith
(
\
a
b
->
max
a
b
)
m
(
transpose
m
)
...
...
@@ -139,6 +150,98 @@ distributional m = run $ filter $ ri (map fromIntegral $ use m)
cross
mat
=
zipWith
(
-
)
(
mkSum
n
mat
)
(
mat
)
int2double
::
Matrix
Int
->
Matrix
Double
int2double
m
=
run
(
map
fromIntegral
$
use
m
)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
{-
Metric Specificity and genericity: select terms
let N termes
Ni : occ de i
Nij : cooc i et j
Probability to get i given j : P(i|j)=Nij/Nj
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
Inclusion (i) = Gen(i)+Spec(i)
Genericity score = Gen(i)- Spec(i)
References:
* Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
-}
type
InclusionExclusion
=
Double
type
SpecificityGenericity
=
Double
data
SquareMatrix
=
SymetricMatrix
|
NonSymetricMatrix
type
SymetricMatrix
=
Matrix
type
NonSymetricMatrix
=
Matrix
incExcSpeGen
::
Matrix
Int
->
(
Vector
InclusionExclusion
,
Vector
SpecificityGenericity
)
incExcSpeGen
m
=
(
run'
inclusionExclusion
m
,
run'
specificityGenericity
m
)
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
--
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
cardN
::
Exp
Double
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
p_ij
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
p_ij
m
=
zipWith
(
/
)
m
(
n_jj
m
)
where
n_jj
::
Elt
e
=>
Acc
(
SymetricMatrix
e
)
->
Acc
(
Matrix
e
)
n_jj
m
=
backpermute
(
shape
m
)
(
lift1
(
\
(
Z
:.
(
_
::
Exp
Int
)
:.
(
j
::
Exp
Int
))
->
(
Z
:.
j
:.
j
)
)
)
m
-- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test
p_ji
::
(
Elt
e
,
P
.
Fractional
(
Exp
e
))
=>
Acc
(
Array
DIM2
e
)
->
Acc
(
Array
DIM2
e
)
p_ji
=
transpose
.
p_ij
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
m
=
run'
pro
m
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pro
mat
=
p_ji
mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ m = zipWith (/) m (n_ m)
where
n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_ m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
-> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
)
) m
-}
src/Gargantext/Viz/Graph/Index.hs
View file @
d710b723
...
...
@@ -39,6 +39,8 @@ import qualified Data.Set as S
import
Data.Map
(
Map
)
import
qualified
Data.Map.Strict
as
M
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
type
Index
=
Int
...
...
@@ -50,7 +52,7 @@ score :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
->
Map
(
t
,
t
)
Double
score
f
m
=
fromIndex
fromI
.
mat2map
.
f
$
cooc2mat
toI
m
where
(
toI
,
fromI
)
=
createInd
ex
es
m
(
toI
,
fromI
)
=
createInd
ic
es
m
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
...
...
@@ -79,21 +81,30 @@ toIndex ni ns = indexConversion ni ns
fromIndex
::
Ord
t
=>
Map
Index
t
->
Map
(
Index
,
Index
)
a
->
Map
(
t
,
t
)
a
fromIndex
ni
ns
=
indexConversion
ni
ns
---------------------------------------------------------------------------------
indexConversion
::
(
Ord
b
,
Ord
k
)
=>
Map
k
b
->
Map
(
k
,
k
)
a
->
Map
(
b
,
b
)
a
indexConversion
index
ms
=
M
.
fromList
$
map
(
\
((
k1
,
k2
),
c
)
->
(
((
M
.!
)
index
k1
,
(
M
.!
)
index
k2
),
c
))
(
M
.
toList
ms
)
---------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
createIndexes
::
Ord
t
=>
Map
(
t
,
t
)
b
->
(
Map
t
Index
,
Map
Index
t
)
createIndexes
=
set2indexes
.
cooc2set
-- TODO
fromIndex'
::
Ord
t
=>
Vector
t
->
Map
(
Index
,
Index
)
a
->
Map
(
t
,
t
)
a
fromIndex'
vi
ns
=
undefined
-- TODO
createIndices'
::
Ord
t
=>
Map
(
t
,
t
)
b
->
(
Map
t
Index
,
Vector
t
)
createIndices'
=
undefined
createIndices
::
Ord
t
=>
Map
(
t
,
t
)
b
->
(
Map
t
Index
,
Map
Index
t
)
createIndices
=
set2indices
.
map2set
where
cooc
2set
::
Ord
t
=>
Map
(
t
,
t
)
a
->
Set
t
cooc
2set
cs'
=
foldl'
(
\
s
((
t1
,
t2
),
_
)
->
insert
[
t1
,
t2
]
s
)
S
.
empty
(
M
.
toList
cs'
)
map
2set
::
Ord
t
=>
Map
(
t
,
t
)
a
->
Set
t
map
2set
cs'
=
foldl'
(
\
s
((
t1
,
t2
),
_
)
->
insert
[
t1
,
t2
]
s
)
S
.
empty
(
M
.
toList
cs'
)
where
insert
as
s
=
foldl'
(
\
s'
t
->
S
.
insert
t
s'
)
s
as
set2ind
ex
es
::
Ord
t
=>
Set
t
->
(
Map
t
Index
,
Map
Index
t
)
set2ind
ex
es
s
=
(
M
.
fromList
toIndex'
,
M
.
fromList
fromIndex'
)
set2ind
ic
es
::
Ord
t
=>
Set
t
->
(
Map
t
Index
,
Map
Index
t
)
set2ind
ic
es
s
=
(
M
.
fromList
toIndex'
,
M
.
fromList
fromIndex'
)
where
fromIndex'
=
zip
[
0
..
]
xs
toIndex'
=
zip
xs
[
0
..
]
...
...
stack.yaml
View file @
d710b723
...
...
@@ -14,7 +14,7 @@ extra-deps:
-
git
:
https://github.com/delanoe/servant-static-th.git
commit
:
fff77e79fe94d563ab5cae2609b78c17b5c1f434
-
accelerate-1.2.0.0
-
accelerate-io
-1.2.0.0
-
hashtables-1.2.3.0
# needed by accelerate
-1.2.0.0
-
aeson-1.2.4.0
-
aeson-lens-0.5.0.0
-
duckling-0.1.3.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment