Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
c93b59c9
Unverified
Commit
c93b59c9
authored
Jun 06, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve more tests
parent
badd865e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
70 additions
and
30 deletions
+70
-30
Eleve.hs
src/Gargantext/Text/Eleve.hs
+70
-30
No files found.
src/Gargantext/Text/Eleve.hs
View file @
c93b59c9
...
...
@@ -37,6 +37,7 @@ Notes for current implementation:
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module
Gargantext.Text.Eleve
where
...
...
@@ -68,8 +69,8 @@ type Entropy e =
------------------------------------------------------------------------
-- | Example and tests for development
data
I
e
=
I
{
_info_entropy
::
e
,
_info_
norm_entropy
::
e
{
_info_entropy
::
e
,
_info_
autonomy
::
e
}
instance
Show
e
=>
Show
(
I
e
)
where
...
...
@@ -321,24 +322,14 @@ mainEleve n input = map unToken . split identity (t :: Trie Token Double) <$> in
where
inp = toToken (n - 1) <$> input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-- NP: here we use the entropy to split
-- instead we should use either:
-- info_norm_entropy or info_norm_entropy'
-- However they should first be fixed.
-}
testEleve
::
Bool
->
Int
->
[
Text
]
->
IO
Bool
testEleve
debug
n
output
=
do
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
)]
->
IO
Bool
testEleve
debug
n
output
checks
=
do
let
out
=
T
.
words
<$>
output
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
(
n
-
1
)
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlong
n
1
<$>
inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
setNormEntropy
(
t
::
Trie
Token
Double
)
{-
pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
| ps <- L.nub $ [ c
...
...
@@ -350,20 +341,42 @@ testEleve debug n output = do
-}
--res = map unToken . split identity fwd <$> inp
--res = map unToken . split info_norm_entropy' nt' <$> inp
res
=
map
unToken
.
split
info_
norm_entrop
y
nt
<$>
inp
res
=
map
unToken
.
split
info_
autonom
y
nt
<$>
inp
when
debug
$
do
P
.
putStrLn
(
show
input
)
-- mapM_ (P.putStrLn . show) pss
P
.
putStrLn
""
printTrie
nt
{-
-- printTrie nt
printTrie
(
_fwd
nt
)
printTrie
(
_bwd
nt
)
-}
P
.
putStrLn
$
show
res
forM_
checks
checker
pure
$
expected
==
res
where
out
=
T
.
words
<$>
output
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
(
n
-
1
)
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlong
n
1
<$>
inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
setNormEntropy
t
check
f
msg
x
y
=
if
f
x
y
then
P
.
putStrLn
$
" PASS "
<>
msg
<>
" "
<>
show
x
<>
" ~= "
<>
show
y
else
P
.
putStrLn
$
" FAIL "
<>
msg
<>
" "
<>
show
x
<>
" /= "
<>
show
y
checker
(
ngram
,
count
,
entropy
,
_ev
,
autonomy
,
bwd_entropy
,
fwd_entropy
)
=
do
let
t'
=
findTrie
(
NonTerminal
<$>
T
.
words
ngram
)
nt
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy"
entropy
(
nodeEntropy
info_entropy
t'
)
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
t'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
info_entropy
(
_bwd
t'
))
printTrie
=
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
...
...
@@ -384,17 +397,44 @@ example6 = ["le-petit chat"
,
"le gros rat"
]
checks0
,
checks2
::
[(
Text
,
Int
,
Double
,
Double
,
Double
,
Double
,
Double
)]
checks0
=
[(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
,(
"York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
,(
"York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
]
checks2
=
[(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be or"
,
2
,
0.5
,
nan
,
nan
,
nan
,
1.0
)
,(
"or not"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"not to"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be or"
,
2
,
0.5
,
nan
,
nan
,
nan
,
1.0
)
,(
"or NOT"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"NOT to"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
]
runTests
::
IO
()
runTests
=
forM_
[(
"example0"
,
2
,
example0
)
,(
"example1"
,
2
,
example1
)
,(
"example2"
,
3
,
example2
)
,(
"example3"
,
2
,
example3
)
,(
"example4"
,
4
,
example4
)
,(
"example5"
,
5
,
example5
)
[(
"example0"
,
2
,
example0
,
checks0
)
,(
"example1"
,
2
,
example1
,
[]
)
,(
"example2"
,
3
,
example2
,
checks2
)
,(
"example3"
,
2
,
example3
,
[]
)
,(
"example4"
,
4
,
example4
,
[]
)
,(
"example5"
,
5
,
example5
,
[]
)
]
(
\
(
name
,
n
,
ex
)
->
do
b
<-
testEleve
False
n
ex
P
.
putStrLn
$
name
<>
" "
<>
show
n
<>
" "
<>
if
b
then
"PASS"
else
"FAIL"
(
\
(
name
,
n
,
ex
,
checks
)
->
do
P
.
putStrLn
$
name
<>
" "
<>
show
n
b
<-
testEleve
False
n
ex
checks
P
.
putStrLn
$
" splitting: "
<>
if
b
then
"PASS"
else
"FAIL"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment