Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
be6b4469
Commit
be6b4469
authored
Jun 06, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[SCORE] spegen quality tested, need to add test in comments.
parent
c50be323
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
82 additions
and
54 deletions
+82
-54
Pipeline.hs
src/Gargantext/Pipeline.hs
+5
-4
Metrics.hs
src/Gargantext/Text/Metrics.hs
+42
-31
Count.hs
src/Gargantext/Text/Metrics/Count.hs
+1
-1
En.hs
src/Gargantext/Text/Terms/Multi/Lang/En.hs
+2
-2
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+32
-16
No files found.
src/Gargantext/Pipeline.hs
View file @
be6b4469
...
@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
...
@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics
(
incExcSpeGen
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
...
@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
...
@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- . fromIndex fi $ filterMat $ cooc2mat ti m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
where
...
@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take
...
@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take
pipeline
path
=
do
pipeline
path
=
do
-- Text <- IO Text <- FilePath
-- Text <- IO Text <- FilePath
text
<-
readFile
path
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
3
)
text
let
contexts
=
splitBy
(
Sentences
5
)
text
myterms
<-
extractTerms
Multi
FR
contexts
myterms
<-
extractTerms
Multi
FR
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
let
myCooc
=
removeApax
$
cooc
myterms
let
(
ti
,
fi
)
=
createIndices
myCooc
--
let (ti, fi) = createIndices myCooc
pure
ti
pure
$
incExcSpeGen
myCooc
-- Cooc -> Matrix
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
-- -- filter by spec/gen (dynmaic programming)
...
...
src/Gargantext/Text/Metrics.hs
View file @
be6b4469
...
@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@
...
@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Metrics
where
module
Gargantext.Text.Metrics
where
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.Text
as
T
import
Data.Map
(
Map
)
import
Data.List
(
concat
)
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
both
)
--import GHC.Real (Ratio)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
--import qualified Data.Text.Metrics as DTM
import
Data.Array.Accelerate
(
toList
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.Count
(
occurrences
,
cooc
)
import
Gargantext.Text.Metrics.Count
(
occurrences
,
cooc
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
),
terms
)
import
Gargantext.Text.Terms
(
TermType
(
M
onoM
ulti
),
terms
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
--noApax m = M.filter (>1) m
metrics_text
::
Text
metrics_text
::
Text
metrics_text
=
T
.
intercalate
" "
[
"A table is an object."
metrics_text
=
T
.
intercalate
" "
metrics_sentences
,
"A glas is an object too."
,
"Using a glas to dring is a function."
,
"Using a spoon to eat is a function."
,
"The spoon is an object to eat."
]
metrics_sentences'
::
[
Text
]
metrics_sentences'
::
[
Text
]
metrics_sentences'
=
splitBy
(
Sentences
0
)
metrics_text
metrics_sentences'
=
splitBy
(
Sentences
0
)
metrics_text
-- | Sentences
-- | Sentences
metrics_sentences
::
[
Text
]
metrics_sentences
::
[
Text
]
metrics_sentences
=
[
"A table is an object
."
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon
."
,
"A glas is an object too
."
,
"I can see the glass on the table
."
,
"The glas and the spoon are on the
table."
,
"There was a spoon on that
table."
,
"The spoon is an object to eat
."
,
"The glass just fall from the table, pouring wine elsewhere
."
,
"The spoon is on the table and the plate and the glas."
]
,
"I wish the glass did not contain wine."
]
metrics_sentences_Test
=
metrics_sentences
==
metrics_sentences'
metrics_sentences_Test
=
metrics_sentences
==
metrics_sentences'
-- | Terms reordered to visually check occurrences
-- | Terms reordered to visually check occurrences
metrics_terms
::
[[
Text
]]
metrics_terms
::
IO
[[
Terms
]]
metrics_terms
=
undefined
metrics_terms
=
mapM
(
terms
MonoMulti
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
metrics_terms'
::
IO
[[
Terms
]]
metrics_terms'
=
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
--metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
-- | Occurrences
-- | Occurrences
{-
{-
...
@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
...
@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
-}
metrics_occ
=
occurrences
<$>
concat
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_occ
=
occurrences
<$>
L
.
concat
<$>
metrics_terms
{-
{-
-- fromList [((["glas"],["object"]),6)
-- fromList [((["glas"],["object"]),6)
...
@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente
...
@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
-}
metrics_cooc
=
cooc
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc_mat
=
do
metrics_cooc'
=
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
"The table object. The table object."
)
m
<-
metrics_cooc
let
(
ti
,
_
)
=
createIndices
m
let
mat
=
cooc2mat
ti
m
pure
(
ti
,
mat
,
incExcSpeGen_proba
mat
,
incExcSpeGen'
mat
)
metrics_incExcSpeGen
=
incExcSpeGen
<$>
metrics_cooc
incExcSpeGen
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen
m
=
both
(
\
x
->
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
)
(
incExcSpeGen'
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
src/Gargantext/Text/Metrics/Count.hs
View file @
be6b4469
...
@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
...
@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
xs
=
[
((
x
,
y
),
1
)
xs
=
[
((
x
,
y
),
1
)
|
x
<-
ts'
|
x
<-
ts'
,
y
<-
ts'
,
y
<-
ts'
,
x
<
y
-- , x /=
y
]
]
...
...
src/Gargantext/Text/Terms/Multi/Lang/En.hs
View file @
be6b4469
...
@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
...
@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group
[]
=
[]
group
[]
=
[]
group
ntags
=
group2
NP
NP
group
ntags
=
group2
NP
NP
$
group2
NP
VB
$
group2
NP
VB
$
group2
NP
IN
--
$ group2 NP IN
$
group2
IN
DT
$
group2
IN
DT
$
group2
VB
NP
--
$ group2 VB NP
$
group2
JJ
NP
$
group2
JJ
NP
$
group2
JJ
JJ
$
group2
JJ
JJ
$
group2
JJ
CC
$
group2
JJ
CC
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
be6b4469
...
@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
...
@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba
r
mat
=
zipWith
(
/
)
mat
(
mkSum
r
mat
)
proba
r
mat
=
zipWith
(
/
)
mat
(
mkSum
r
mat
)
mkSum
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
mkSum
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
sum
mat
$
fold
(
+
)
0
mat
divByDiag
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
divByDiag
r
mat
=
zipWith
(
/
)
mat
(
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
diag
mat
)
diag
::
forall
e
.
Elt
e
=>
Acc
(
Matrix
e
)
->
Acc
(
Vector
e
)
diag
m
=
backpermute
(
indexTail
(
shape
m
))
(
lift1
(
\
(
Z
:.
x
)
->
(
Z
:.
x
:.
(
x
::
Exp
Int
))))
(
m
::
Acc
(
Array
DIM2
e
))
type
Matrix'
a
=
Acc
(
Matrix
a
)
type
Matrix'
a
=
Acc
(
Matrix
a
)
...
@@ -90,7 +95,7 @@ type InclusionExclusion = Double
...
@@ -90,7 +95,7 @@ type InclusionExclusion = Double
type
SpecificityGenericity
=
Double
type
SpecificityGenericity
=
Double
miniMax
::
Matrix'
Double
->
Matrix'
Double
miniMax
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
miniMax'
=
(
the
$
minimum
$
maximum
m
)
...
@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double
...
@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double
int2double
m
=
run
(
map
fromIntegral
$
use
m
)
int2double
m
=
run
(
map
fromIntegral
$
use
m
)
{-
{-
Metric Specificity and genericty: select terms
Metric Specificity and generic
i
ty: select terms
Compute genericity/specificity:
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
P(i|j) = N(ij) / N(jj)
...
@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms
...
@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms
Gen(i) = Mean{j} P(j_k|i)
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
-}
-}
incExcSpeGen
::
Matrix
Int
->
(
Vector
Double
,
Vector
Double
)
incExcSpeGen
m
=
(
run'
ie
m
,
run'
sg
m
)
incExcSpeGen'
::
Matrix
Int
->
(
Vector
Double
,
Vector
Double
)
incExcSpeGen'
m
=
(
run'
ie
m
,
run'
sg
m
)
where
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pV
::
Matrix'
Double
->
Acc
(
Vector
Double
)
ie
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
sum
$
proba
(
rank'
m
)
mat
pH
::
Matrix'
Double
->
Acc
(
Vector
Double
)
pH
mat
=
sum
$
transpose
$
proba
(
rank'
m
)
mat
ie
::
Matrix'
Double
->
Acc
(
Vector
Double
)
ie
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
ie
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
--
sg
::
Matrix'
Double
->
Acc
(
Vector
Double
)
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
sg
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
sg
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
n
::
Exp
Double
n
=
constant
(
P
.
fromIntegral
(
rank'
m
-
1
)
::
Double
)
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
n
)
$
sum
$
divByDiag
(
rank'
m
)
mat
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
n
)
$
sum
$
transpose
$
divByDiag
(
rank'
m
)
mat
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
m
=
run'
pro
m
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pro
mat
=
divByDiag
(
rank'
m
)
mat
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment