Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
c93b59c9
Unverified
Commit
c93b59c9
authored
Jun 06, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve more tests
parent
badd865e
Pipeline
#435
canceled with stage
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
70 additions
and
30 deletions
+70
-30
Eleve.hs
src/Gargantext/Text/Eleve.hs
+70
-30
No files found.
src/Gargantext/Text/Eleve.hs
View file @
c93b59c9
...
...
@@ -37,6 +37,7 @@ Notes for current implementation:
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module
Gargantext.Text.Eleve
where
...
...
@@ -69,7 +70,7 @@ type Entropy e =
-- | Example and tests for development
data
I
e
=
I
{
_info_entropy
::
e
,
_info_
norm_entropy
::
e
,
_info_
autonomy
::
e
}
instance
Show
e
=>
Show
(
I
e
)
where
...
...
@@ -321,24 +322,14 @@ mainEleve n input = map unToken . split identity (t :: Trie Token Double) <$> in
where
inp = toToken (n - 1) <$> input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-- NP: here we use the entropy to split
-- instead we should use either:
-- info_norm_entropy or info_norm_entropy'
-- However they should first be fixed.
-}
testEleve
::
Bool
->
Int
->
[
Text
]
->
IO
Bool
testEleve
debug
n
output
=
do
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
)]
->
IO
Bool
testEleve
debug
n
output
checks
=
do
let
out
=
T
.
words
<$>
output
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
(
n
-
1
)
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlong
n
1
<$>
inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
setNormEntropy
(
t
::
Trie
Token
Double
)
{-
pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
| ps <- L.nub $ [ c
...
...
@@ -350,20 +341,42 @@ testEleve debug n output = do
-}
--res = map unToken . split identity fwd <$> inp
--res = map unToken . split info_norm_entropy' nt' <$> inp
res
=
map
unToken
.
split
info_
norm_entrop
y
nt
<$>
inp
res
=
map
unToken
.
split
info_
autonom
y
nt
<$>
inp
when
debug
$
do
P
.
putStrLn
(
show
input
)
-- mapM_ (P.putStrLn . show) pss
P
.
putStrLn
""
printTrie
nt
{-
-- printTrie nt
printTrie
(
_fwd
nt
)
printTrie
(
_bwd
nt
)
-}
P
.
putStrLn
$
show
res
forM_
checks
checker
pure
$
expected
==
res
where
out
=
T
.
words
<$>
output
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
(
n
-
1
)
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlong
n
1
<$>
inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
setNormEntropy
t
check
f
msg
x
y
=
if
f
x
y
then
P
.
putStrLn
$
" PASS "
<>
msg
<>
" "
<>
show
x
<>
" ~= "
<>
show
y
else
P
.
putStrLn
$
" FAIL "
<>
msg
<>
" "
<>
show
x
<>
" /= "
<>
show
y
checker
(
ngram
,
count
,
entropy
,
_ev
,
autonomy
,
bwd_entropy
,
fwd_entropy
)
=
do
let
t'
=
findTrie
(
NonTerminal
<$>
T
.
words
ngram
)
nt
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy"
entropy
(
nodeEntropy
info_entropy
t'
)
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
t'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
info_entropy
(
_bwd
t'
))
printTrie
=
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
...
...
@@ -384,17 +397,44 @@ example6 = ["le-petit chat"
,
"le gros rat"
]
checks0
,
checks2
::
[(
Text
,
Int
,
Double
,
Double
,
Double
,
Double
,
Double
)]
checks0
=
[(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
,(
"York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
,(
"York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
]
checks2
=
[(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be or"
,
2
,
0.5
,
nan
,
nan
,
nan
,
1.0
)
,(
"or not"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"not to"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be or"
,
2
,
0.5
,
nan
,
nan
,
nan
,
1.0
)
,(
"or NOT"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"NOT to"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
]
runTests
::
IO
()
runTests
=
forM_
[(
"example0"
,
2
,
example0
)
,(
"example1"
,
2
,
example1
)
,(
"example2"
,
3
,
example2
)
,(
"example3"
,
2
,
example3
)
,(
"example4"
,
4
,
example4
)
,(
"example5"
,
5
,
example5
)
[(
"example0"
,
2
,
example0
,
checks0
)
,(
"example1"
,
2
,
example1
,
[]
)
,(
"example2"
,
3
,
example2
,
checks2
)
,(
"example3"
,
2
,
example3
,
[]
)
,(
"example4"
,
4
,
example4
,
[]
)
,(
"example5"
,
5
,
example5
,
[]
)
]
(
\
(
name
,
n
,
ex
)
->
do
b
<-
testEleve
False
n
ex
P
.
putStrLn
$
name
<>
" "
<>
show
n
<>
" "
<>
if
b
then
"PASS"
else
"FAIL"
(
\
(
name
,
n
,
ex
,
checks
)
->
do
P
.
putStrLn
$
name
<>
" "
<>
show
n
b
<-
testEleve
False
n
ex
checks
P
.
putStrLn
$
" splitting: "
<>
if
b
then
"PASS"
else
"FAIL"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment