Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
198
Issues
198
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
be6b4469
Commit
be6b4469
authored
Jun 06, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[SCORE] spegen quality tested, need to add test in comments.
parent
c50be323
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
82 additions
and
54 deletions
+82
-54
Pipeline.hs
src/Gargantext/Pipeline.hs
+5
-4
Metrics.hs
src/Gargantext/Text/Metrics.hs
+42
-31
Count.hs
src/Gargantext/Text/Metrics/Count.hs
+1
-1
En.hs
src/Gargantext/Text/Terms/Multi/Lang/En.hs
+2
-2
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+32
-16
No files found.
src/Gargantext/Pipeline.hs
View file @
be6b4469
...
...
@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics
(
incExcSpeGen
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
...
...
@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
...
...
@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take
pipeline
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
3
)
text
let
contexts
=
splitBy
(
Sentences
5
)
text
myterms
<-
extractTerms
Multi
FR
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
let
(
ti
,
fi
)
=
createIndices
myCooc
pure
ti
--
let (ti, fi) = createIndices myCooc
pure
$
incExcSpeGen
myCooc
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
...
...
src/Gargantext/Text/Metrics.hs
View file @
be6b4469
...
...
@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Metrics
where
module
Gargantext.Text.Metrics
where
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.Text
as
T
import
Data.List
(
concat
)
import
Data.Map
(
Map
)
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
both
)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
import
Data.Array.Accelerate
(
toList
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.Count
(
occurrences
,
cooc
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
),
terms
)
import
Gargantext.Text.Terms
(
TermType
(
M
onoM
ulti
),
terms
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
metrics_text
::
Text
metrics_text
=
T
.
intercalate
" "
[
"A table is an object."
,
"A glas is an object too."
,
"Using a glas to dring is a function."
,
"Using a spoon to eat is a function."
,
"The spoon is an object to eat."
]
metrics_text
=
T
.
intercalate
" "
metrics_sentences
metrics_sentences'
::
[
Text
]
metrics_sentences'
=
splitBy
(
Sentences
0
)
metrics_text
-- | Sentences
metrics_sentences
::
[
Text
]
metrics_sentences
=
[
"A table is an object
."
,
"A glas is an object too
."
,
"The glas and the spoon are on the
table."
,
"The spoon is an object to eat
."
,
"The spoon is on the table and the plate and the glas."
]
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon
."
,
"I can see the glass on the table
."
,
"There was a spoon on that
table."
,
"The glass just fall from the table, pouring wine elsewhere
."
,
"I wish the glass did not contain wine."
]
metrics_sentences_Test
=
metrics_sentences
==
metrics_sentences'
-- | Terms reordered to visually check occurrences
metrics_terms
::
[[
Text
]]
metrics_terms
=
undefined
metrics_terms'
::
IO
[[
Terms
]]
metrics_terms'
=
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
--metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
metrics_terms
::
IO
[[
Terms
]]
metrics_terms
=
mapM
(
terms
MonoMulti
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
-- | Occurrences
{-
...
...
@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ
=
occurrences
<$>
concat
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_occ
=
occurrences
<$>
L
.
concat
<$>
metrics_terms
{-
-- fromList [((["glas"],["object"]),6)
...
...
@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc
=
cooc
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_cooc'
=
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
"The table object. The table object."
)
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc_mat
=
do
m
<-
metrics_cooc
let
(
ti
,
_
)
=
createIndices
m
let
mat
=
cooc2mat
ti
m
pure
(
ti
,
mat
,
incExcSpeGen_proba
mat
,
incExcSpeGen'
mat
)
metrics_incExcSpeGen
=
incExcSpeGen
<$>
metrics_cooc
incExcSpeGen
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen
m
=
both
(
\
x
->
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
)
(
incExcSpeGen'
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
src/Gargantext/Text/Metrics/Count.hs
View file @
be6b4469
...
...
@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
xs
=
[
((
x
,
y
),
1
)
|
x
<-
ts'
,
y
<-
ts'
,
x
<
y
-- , x /=
y
]
...
...
src/Gargantext/Text/Terms/Multi/Lang/En.hs
View file @
be6b4469
...
...
@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group
[]
=
[]
group
ntags
=
group2
NP
NP
$
group2
NP
VB
$
group2
NP
IN
--
$ group2 NP IN
$
group2
IN
DT
$
group2
VB
NP
--
$ group2 VB NP
$
group2
JJ
NP
$
group2
JJ
JJ
$
group2
JJ
CC
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
be6b4469
...
...
@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba
r
mat
=
zipWith
(
/
)
mat
(
mkSum
r
mat
)
mkSum
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
fold
(
+
)
0
mat
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
sum
mat
divByDiag
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
divByDiag
r
mat
=
zipWith
(
/
)
mat
(
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
diag
mat
)
diag
::
forall
e
.
Elt
e
=>
Acc
(
Matrix
e
)
->
Acc
(
Vector
e
)
diag
m
=
backpermute
(
indexTail
(
shape
m
))
(
lift1
(
\
(
Z
:.
x
)
->
(
Z
:.
x
:.
(
x
::
Exp
Int
))))
(
m
::
Acc
(
Array
DIM2
e
))
type
Matrix'
a
=
Acc
(
Matrix
a
)
...
...
@@ -90,7 +95,7 @@ type InclusionExclusion = Double
type
SpecificityGenericity
=
Double
miniMax
::
Matrix'
Double
->
Matrix'
Double
miniMax
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
...
...
@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double
int2double
m
=
run
(
map
fromIntegral
$
use
m
)
{-
Metric Specificity and genericty: select terms
Metric Specificity and generic
i
ty: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
...
...
@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
-}
incExcSpeGen
::
Matrix
Int
->
(
Vector
Double
,
Vector
Double
)
incExcSpeGen
m
=
(
run'
ie
m
,
run'
sg
m
)
incExcSpeGen'
::
Matrix
Int
->
(
Vector
Double
,
Vector
Double
)
incExcSpeGen'
m
=
(
run'
ie
m
,
run'
sg
m
)
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pV
::
Matrix'
Double
->
Acc
(
Vector
Double
)
pV
mat
=
sum
$
proba
(
rank'
m
)
mat
pH
::
Matrix'
Double
->
Acc
(
Vector
Double
)
pH
mat
=
sum
$
transpose
$
proba
(
rank'
m
)
mat
ie
::
Matrix'
Double
->
Acc
(
Vector
Double
)
ie
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
ie
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
sg
::
Matrix'
Double
->
Acc
(
Vector
Double
)
--
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
sg
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
n
::
Exp
Double
n
=
constant
(
P
.
fromIntegral
(
rank'
m
-
1
)
::
Double
)
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
n
)
$
sum
$
divByDiag
(
rank'
m
)
mat
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
n
)
$
sum
$
transpose
$
divByDiag
(
rank'
m
)
mat
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
m
=
run'
pro
m
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pro
mat
=
divByDiag
(
rank'
m
)
mat
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment