Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
be6b4469
Commit
be6b4469
authored
Jun 06, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[SCORE] spegen quality tested, need to add test in comments.
parent
c50be323
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
82 additions
and
54 deletions
+82
-54
Pipeline.hs
src/Gargantext/Pipeline.hs
+5
-4
Metrics.hs
src/Gargantext/Text/Metrics.hs
+42
-31
Count.hs
src/Gargantext/Text/Metrics/Count.hs
+1
-1
En.hs
src/Gargantext/Text/Terms/Multi/Lang/En.hs
+2
-2
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+32
-16
No files found.
src/Gargantext/Pipeline.hs
View file @
be6b4469
...
...
@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import
Gargantext.Viz.Graph.Distances.Matrice
(
conditional'
,
conditional
)
import
Gargantext.Viz.Graph.Index
(
Index
)
import
Gargantext.Text.Metrics.Count
(
cooc
,
removeApax
)
import
Gargantext.Text.Metrics
(
incExcSpeGen
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
,
Mono
),
extractTerms
)
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
...
...
@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import
Data.Array.Accelerate
(
Matrix
)
filterMat
::
Matrix
Int
->
[(
Index
,
Index
)]
filterMat
m
=
S
.
toList
$
S
.
take
n
$
S
.
fromList
$
(
L
.
take
nIe
incExc'
)
<>
(
L
.
take
nSg
speGen'
)
where
...
...
@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take
pipeline
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
3
)
text
let
contexts
=
splitBy
(
Sentences
5
)
text
myterms
<-
extractTerms
Multi
FR
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
let
(
ti
,
fi
)
=
createIndices
myCooc
pure
ti
--
let (ti, fi) = createIndices myCooc
pure
$
incExcSpeGen
myCooc
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
...
...
src/Gargantext/Text/Metrics.hs
View file @
be6b4469
...
...
@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Metrics
where
module
Gargantext.Text.Metrics
where
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.Text
as
T
import
Data.List
(
concat
)
import
Data.Map
(
Map
)
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
both
)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
import
Data.Array.Accelerate
(
toList
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.Count
(
occurrences
,
cooc
)
import
Gargantext.Text.Terms
(
TermType
(
Multi
),
terms
)
import
Gargantext.Text.Terms
(
TermType
(
M
onoM
ulti
),
terms
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Types
(
Terms
(
..
))
import
Gargantext.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Viz.Graph.Distances.Matrice
import
Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
metrics_text
::
Text
metrics_text
=
T
.
intercalate
" "
[
"A table is an object."
,
"A glas is an object too."
,
"Using a glas to dring is a function."
,
"Using a spoon to eat is a function."
,
"The spoon is an object to eat."
]
metrics_text
=
T
.
intercalate
" "
metrics_sentences
metrics_sentences'
::
[
Text
]
metrics_sentences'
=
splitBy
(
Sentences
0
)
metrics_text
-- | Sentences
metrics_sentences
::
[
Text
]
metrics_sentences
=
[
"A table is an object
."
,
"A glas is an object too
."
,
"The glas and the spoon are on the
table."
,
"The spoon is an object to eat
."
,
"The spoon is on the table and the plate and the glas."
]
metrics_sentences
=
[
"There is a table with a glass of wine and a spoon
."
,
"I can see the glass on the table
."
,
"There was a spoon on that
table."
,
"The glass just fall from the table, pouring wine elsewhere
."
,
"I wish the glass did not contain wine."
]
metrics_sentences_Test
=
metrics_sentences
==
metrics_sentences'
-- | Terms reordered to visually check occurrences
metrics_terms
::
[[
Text
]]
metrics_terms
=
undefined
metrics_terms'
::
IO
[[
Terms
]]
metrics_terms'
=
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
--metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
metrics_terms
::
IO
[[
Terms
]]
metrics_terms
=
mapM
(
terms
MonoMulti
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
-- | Occurrences
{-
...
...
@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ
=
occurrences
<$>
concat
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_occ
=
occurrences
<$>
L
.
concat
<$>
metrics_terms
{-
-- fromList [((["glas"],["object"]),6)
...
...
@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc
=
cooc
<$>
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
metrics_text
)
metrics_cooc'
=
(
mapM
(
terms
Multi
EN
)
$
splitBy
(
Sentences
0
)
"The table object. The table object."
)
metrics_cooc
=
cooc
<$>
metrics_terms
metrics_cooc_mat
=
do
m
<-
metrics_cooc
let
(
ti
,
_
)
=
createIndices
m
let
mat
=
cooc2mat
ti
m
pure
(
ti
,
mat
,
incExcSpeGen_proba
mat
,
incExcSpeGen'
mat
)
metrics_incExcSpeGen
=
incExcSpeGen
<$>
metrics_cooc
incExcSpeGen
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen
m
=
both
(
\
x
->
L
.
reverse
$
L
.
sortOn
snd
$
zip
(
map
snd
$
M
.
toList
fi
)
(
toList
x
)
)
(
incExcSpeGen'
$
cooc2mat
ti
m
)
where
(
ti
,
fi
)
=
createIndices
m
src/Gargantext/Text/Metrics/Count.hs
View file @
be6b4469
...
...
@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
xs
=
[
((
x
,
y
),
1
)
|
x
<-
ts'
,
y
<-
ts'
,
x
<
y
-- , x /=
y
]
...
...
src/Gargantext/Text/Terms/Multi/Lang/En.hs
View file @
be6b4469
...
...
@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group
[]
=
[]
group
ntags
=
group2
NP
NP
$
group2
NP
VB
$
group2
NP
IN
--
$ group2 NP IN
$
group2
IN
DT
$
group2
VB
NP
--
$ group2 VB NP
$
group2
JJ
NP
$
group2
JJ
JJ
$
group2
JJ
CC
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
be6b4469
...
...
@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba
r
mat
=
zipWith
(
/
)
mat
(
mkSum
r
mat
)
mkSum
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
fold
(
+
)
0
mat
mkSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
sum
mat
divByDiag
::
Rank
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
divByDiag
r
mat
=
zipWith
(
/
)
mat
(
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
diag
mat
)
diag
::
forall
e
.
Elt
e
=>
Acc
(
Matrix
e
)
->
Acc
(
Vector
e
)
diag
m
=
backpermute
(
indexTail
(
shape
m
))
(
lift1
(
\
(
Z
:.
x
)
->
(
Z
:.
x
:.
(
x
::
Exp
Int
))))
(
m
::
Acc
(
Array
DIM2
e
))
type
Matrix'
a
=
Acc
(
Matrix
a
)
...
...
@@ -90,7 +95,7 @@ type InclusionExclusion = Double
type
SpecificityGenericity
=
Double
miniMax
::
Matrix'
Double
->
Matrix'
Double
miniMax
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
...
...
@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double
int2double
m
=
run
(
map
fromIntegral
$
use
m
)
{-
Metric Specificity and genericty: select terms
Metric Specificity and generic
i
ty: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
...
...
@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
-}
incExcSpeGen
::
Matrix
Int
->
(
Vector
Double
,
Vector
Double
)
incExcSpeGen
m
=
(
run'
ie
m
,
run'
sg
m
)
incExcSpeGen'
::
Matrix
Int
->
(
Vector
Double
,
Vector
Double
)
incExcSpeGen'
m
=
(
run'
ie
m
,
run'
sg
m
)
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pV
::
Matrix'
Double
->
Acc
(
Vector
Double
)
pV
mat
=
sum
$
proba
(
rank'
m
)
mat
pH
::
Matrix'
Double
->
Acc
(
Vector
Double
)
pH
mat
=
sum
$
transpose
$
proba
(
rank'
m
)
mat
ie
::
Matrix'
Double
->
Acc
(
Vector
Double
)
ie
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
ie
mat
=
zipWith
(
-
)
(
pV
mat
)
(
pH
mat
)
sg
::
Matrix'
Double
->
Acc
(
Vector
Double
)
--
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
sg
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pH
mat
)
n
::
Exp
Double
n
=
constant
(
P
.
fromIntegral
(
rank'
m
-
1
)
::
Double
)
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
n
)
$
sum
$
divByDiag
(
rank'
m
)
mat
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
n
)
$
sum
$
transpose
$
divByDiag
(
rank'
m
)
mat
incExcSpeGen_proba
::
Matrix
Int
->
Matrix
Double
incExcSpeGen_proba
m
=
run'
pro
m
where
run'
fun
mat
=
run
$
fun
$
map
fromIntegral
$
use
mat
pro
mat
=
divByDiag
(
rank'
m
)
mat
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment