Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
197
Issues
197
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
c93b59c9
Unverified
Commit
c93b59c9
authored
Jun 06, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve more tests
parent
badd865e
Pipeline
#435
canceled with stage
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
70 additions
and
30 deletions
+70
-30
Eleve.hs
src/Gargantext/Text/Eleve.hs
+70
-30
No files found.
src/Gargantext/Text/Eleve.hs
View file @
c93b59c9
...
@@ -37,6 +37,7 @@ Notes for current implementation:
...
@@ -37,6 +37,7 @@ Notes for current implementation:
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module
Gargantext.Text.Eleve
where
module
Gargantext.Text.Eleve
where
...
@@ -68,8 +69,8 @@ type Entropy e =
...
@@ -68,8 +69,8 @@ type Entropy e =
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Example and tests for development
-- | Example and tests for development
data
I
e
=
I
data
I
e
=
I
{
_info_entropy
::
e
{
_info_entropy
::
e
,
_info_
norm_entropy
::
e
,
_info_
autonomy
::
e
}
}
instance
Show
e
=>
Show
(
I
e
)
where
instance
Show
e
=>
Show
(
I
e
)
where
...
@@ -321,24 +322,14 @@ mainEleve n input = map unToken . split identity (t :: Trie Token Double) <$> in
...
@@ -321,24 +322,14 @@ mainEleve n input = map unToken . split identity (t :: Trie Token Double) <$> in
where
where
inp = toToken (n - 1) <$> input
inp = toToken (n - 1) <$> input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-- NP: here we use the entropy to split
-- instead we should use either:
-- info_norm_entropy or info_norm_entropy'
-- However they should first be fixed.
-}
-}
testEleve
::
Bool
->
Int
->
[
Text
]
->
IO
Bool
sim
::
Entropy
e
=>
e
->
e
->
Bool
testEleve
debug
n
output
=
do
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
)]
->
IO
Bool
testEleve
debug
n
output
checks
=
do
let
let
out
=
T
.
words
<$>
output
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
(
n
-
1
)
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlong
n
1
<$>
inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
setNormEntropy
(
t
::
Trie
Token
Double
)
{-
{-
pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
| ps <- L.nub $ [ c
| ps <- L.nub $ [ c
...
@@ -350,20 +341,42 @@ testEleve debug n output = do
...
@@ -350,20 +341,42 @@ testEleve debug n output = do
-}
-}
--res = map unToken . split identity fwd <$> inp
--res = map unToken . split identity fwd <$> inp
--res = map unToken . split info_norm_entropy' nt' <$> inp
--res = map unToken . split info_norm_entropy' nt' <$> inp
res
=
map
unToken
.
split
info_
norm_entrop
y
nt
<$>
inp
res
=
map
unToken
.
split
info_
autonom
y
nt
<$>
inp
when
debug
$
do
when
debug
$
do
P
.
putStrLn
(
show
input
)
P
.
putStrLn
(
show
input
)
-- mapM_ (P.putStrLn . show) pss
-- mapM_ (P.putStrLn . show) pss
P
.
putStrLn
""
P
.
putStrLn
""
printTrie
nt
-- printTrie nt
{-
printTrie
(
_fwd
nt
)
printTrie
(
_fwd
nt
)
printTrie
(
_bwd
nt
)
printTrie
(
_bwd
nt
)
-}
P
.
putStrLn
$
show
res
P
.
putStrLn
$
show
res
forM_
checks
checker
pure
$
expected
==
res
pure
$
expected
==
res
where
where
out
=
T
.
words
<$>
output
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
(
n
-
1
)
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlong
n
1
<$>
inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
setNormEntropy
t
check
f
msg
x
y
=
if
f
x
y
then
P
.
putStrLn
$
" PASS "
<>
msg
<>
" "
<>
show
x
<>
" ~= "
<>
show
y
else
P
.
putStrLn
$
" FAIL "
<>
msg
<>
" "
<>
show
x
<>
" /= "
<>
show
y
checker
(
ngram
,
count
,
entropy
,
_ev
,
autonomy
,
bwd_entropy
,
fwd_entropy
)
=
do
let
t'
=
findTrie
(
NonTerminal
<$>
T
.
words
ngram
)
nt
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy"
entropy
(
nodeEntropy
info_entropy
t'
)
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
t'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
info_entropy
(
_bwd
t'
))
printTrie
=
printTrie
=
P
.
putStrLn
.
Tree
.
drawTree
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
.
fmap
show
...
@@ -384,17 +397,44 @@ example6 = ["le-petit chat"
...
@@ -384,17 +397,44 @@ example6 = ["le-petit chat"
,
"le gros rat"
,
"le gros rat"
]
]
checks0
,
checks2
::
[(
Text
,
Int
,
Double
,
Double
,
Double
,
Double
,
Double
)]
checks0
=
[(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
,(
"York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
,(
"York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.414213562373095
,
nan
,
1.584962500721156
)
]
checks2
=
[(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be or"
,
2
,
0.5
,
nan
,
nan
,
nan
,
1.0
)
,(
"or not"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"not to"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be or"
,
2
,
0.5
,
nan
,
nan
,
nan
,
1.0
)
,(
"or NOT"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"NOT to"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"to be"
,
3
,
1.2516291673878228
,
1.2516291673878228
,
1.5535694744293167
,
nan
,
0.9182958340544896
)
,(
"be and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
]
runTests
::
IO
()
runTests
::
IO
()
runTests
=
runTests
=
forM_
forM_
[(
"example0"
,
2
,
example0
)
[(
"example0"
,
2
,
example0
,
checks0
)
,(
"example1"
,
2
,
example1
)
,(
"example1"
,
2
,
example1
,
[]
)
,(
"example2"
,
3
,
example2
)
,(
"example2"
,
3
,
example2
,
checks2
)
,(
"example3"
,
2
,
example3
)
,(
"example3"
,
2
,
example3
,
[]
)
,(
"example4"
,
4
,
example4
)
,(
"example4"
,
4
,
example4
,
[]
)
,(
"example5"
,
5
,
example5
)
,(
"example5"
,
5
,
example5
,
[]
)
]
]
(
\
(
name
,
n
,
ex
)
->
do
(
\
(
name
,
n
,
ex
,
checks
)
->
do
b
<-
testEleve
False
n
ex
P
.
putStrLn
$
name
<>
" "
<>
show
n
P
.
putStrLn
$
name
<>
" "
<>
show
n
<>
" "
<>
if
b
then
"PASS"
else
"FAIL"
b
<-
testEleve
False
n
ex
checks
P
.
putStrLn
$
" splitting: "
<>
if
b
then
"PASS"
else
"FAIL"
)
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment