Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
8baf4227
Unverified
Commit
8baf4227
authored
Jun 11, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve many changes
parent
f65383e8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
112 additions
and
143 deletions
+112
-143
Eleve.hs
src/Gargantext/Text/Eleve.hs
+112
-143
No files found.
src/Gargantext/Text/Eleve.hs
View file @
8baf4227
...
@@ -44,8 +44,7 @@ module Gargantext.Text.Eleve where
...
@@ -44,8 +44,7 @@ module Gargantext.Text.Eleve where
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
-- import Debug.SimpleReflect
-- import Debug.SimpleReflect
import
Data.Functor.Reverse
import
Control.Lens
hiding
(
levels
,
children
)
import
Control.Lens
(
Lens
'
,
Getting
,
(
^.
),
(
^?
),
view
,
makeLenses
,
_Just
,
under
,
reversed
,
at
,
(
.~
),
to
,
set
)
import
Control.Monad
(
forM_
)
import
Control.Monad
(
forM_
)
import
Data.Ord
(
Ord
)
import
Data.Ord
(
Ord
)
import
qualified
Data.List
as
L
import
qualified
Data.List
as
L
...
@@ -56,11 +55,28 @@ import Data.Map (Map)
...
@@ -56,11 +55,28 @@ import Data.Map (Map)
import
Data.Maybe
(
fromMaybe
)
import
Data.Maybe
(
fromMaybe
)
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
import
Gargantext.Prelude
hiding
(
cs
)
import
Gargantext.Prelude
hiding
(
cs
)
import
qualified
Gargantext.Prelude
as
GP
import
qualified
Data.Tree
as
Tree
import
qualified
Data.Tree
as
Tree
import
Data.Tree
(
Tree
)
import
Data.Tree
(
Tree
)
import
qualified
Prelude
as
P
(
putStrLn
,
logBase
,
isNaN
,
RealFloat
)
import
qualified
Prelude
as
P
(
putStrLn
,
logBase
,
isNaN
,
RealFloat
)
nan
::
Floating
e
=>
e
nan
=
0
/
0
noNaNs
::
P
.
RealFloat
e
=>
[
e
]
->
[
e
]
noNaNs
=
filter
(
not
.
P
.
isNaN
)
updateIfDefined
::
P
.
RealFloat
e
=>
e
->
e
->
e
updateIfDefined
e0
e
|
P
.
isNaN
e
=
e0
|
otherwise
=
e
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
subst
::
Entropy
e
=>
(
e
,
e
)
->
e
->
e
subst
(
src
,
dst
)
x
|
sim
src
x
=
dst
|
otherwise
=
x
------------------------------------------------------------------------
type
Entropy
e
=
type
Entropy
e
=
(
Fractional
e
(
Fractional
e
,
Floating
e
,
Floating
e
...
@@ -77,18 +93,17 @@ data I e = I
...
@@ -77,18 +93,17 @@ data I e = I
}
}
instance
Show
e
=>
Show
(
I
e
)
where
instance
Show
e
=>
Show
(
I
e
)
where
show
(
I
e
v
n
)
=
show
(
e
,
v
,
n
)
show
(
I
e
ev
a
)
=
show
(
e
,
ev
,
a
)
makeLenses
''
I
makeLenses
''
I
type
ModEntropy
i
o
e
=
(
e
->
e
)
->
i
->
o
type
ModEntropy
i
o
e
=
(
e
->
e
)
->
i
->
o
set_autonomy
::
ModEntropy
e
(
I
e
)
e
set_autonomy
::
Entropy
e
=>
ModEntropy
e
(
I
e
)
e
set_autonomy
f
e
=
I
e
e
(
f
e
)
set_autonomy
f
e
=
I
e
nan
(
f
e
)
set_entropy_var
::
ModEntropy
e
(
I
e
)
e
set_entropy_var
f
e
=
I
e
(
f
e
)
e
set_entropy_var
::
Entropy
e
=>
Setter
e
(
I
e
)
e
e
set_entropy_var
f
e
=
(
\
ev
->
I
e
ev
nan
)
<$>
f
e
data
StartStop
=
Start
|
Stop
data
StartStop
=
Start
|
Stop
deriving
(
Ord
,
Eq
,
Show
)
deriving
(
Ord
,
Eq
,
Show
)
...
@@ -155,23 +170,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
...
@@ -155,23 +170,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
nan
::
Floating
e
=>
e
nan
=
0
/
0
noNaNs
::
P
.
RealFloat
e
=>
[
e
]
->
[
e
]
noNaNs
=
filter
(
not
.
P
.
isNaN
)
updateIfDefined
::
P
.
RealFloat
e
=>
e
->
e
->
e
updateIfDefined
e0
e
|
P
.
isNaN
e
=
e0
|
otherwise
=
e
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
subst
::
Entropy
e
=>
(
e
,
e
)
->
e
->
e
subst
(
src
,
dst
)
x
|
sim
src
x
=
dst
|
otherwise
=
x
entropyTrie
::
Entropy
e
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
entropyTrie
::
Entropy
e
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
entropyTrie
_
(
Leaf
c
)
=
Leaf
c
entropyTrie
_
(
Leaf
c
)
=
Leaf
c
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
...
@@ -182,8 +180,8 @@ entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) childre
...
@@ -182,8 +180,8 @@ entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) childre
where
where
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
------------------------------------------------------------------------
------------------------------------------------------------------------
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
->
e
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
normalizeLevel
prev
m
v
e
=
((
e
-
prev
)
-
m
)
/
v
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
{- Unused
{- Unused
...
@@ -199,7 +197,9 @@ class IsTrie trie where
...
@@ -199,7 +197,9 @@ class IsTrie trie where
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
findTrieR
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
printTrie
::
(
Show
i
,
Entropy
e
)
=>
Getting
e
i
e
->
trie
Token
i
->
IO
()
printTrie
::
(
Show
i
,
Entropy
e
)
=>
Getting
e
i
e
->
trie
Token
i
->
IO
()
evTrie
::
Entropy
e
=>
Getting
e
i
e
->
Setter
i
o
e
e
->
trie
k
i
->
trie
k
o
normalizeEntropy
::
Entropy
e
normalizeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
trie
k
i
->
trie
k
o
->
trie
k
i
->
trie
k
o
...
@@ -218,6 +218,7 @@ instance IsTrie Trie where
...
@@ -218,6 +218,7 @@ instance IsTrie Trie where
nodeChild
_
(
Leaf
_
)
=
emptyTrie
nodeChild
_
(
Leaf
_
)
=
emptyTrie
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
findTrieR
=
findTrie
printTrie
inE
t
=
do
printTrie
inE
t
=
do
P
.
putStrLn
.
Tree
.
drawTree
P
.
putStrLn
.
Tree
.
drawTree
...
@@ -227,29 +228,21 @@ instance IsTrie Trie where
...
@@ -227,29 +228,21 @@ instance IsTrie Trie where
forM_
(
normalizationLevels
inE
t
)
$
\
level
->
forM_
(
normalizationLevels
inE
t
)
$
\
level
->
P
.
putStrLn
$
" "
<>
show
level
P
.
putStrLn
$
" "
<>
show
level
evTrie
inE
setEV
=
go
nan
where
go
_
(
Leaf
c
)
=
Leaf
c
go
e0
(
Node
c
i
children
)
=
Node
c
(
i
&
setEV
.~
ev
e0
e1
)
$
go
e1
<$>
children
where
e1
=
i
^.
inE
ev
0
0
=
nan
ev
i0
i1
=
i1
-
i0
normalizeEntropy
inE
modE
t
=
go
(
modE
identity
)
(
normalizationLevels
inE
t
)
t
normalizeEntropy
inE
modE
t
=
go
(
modE
identity
)
(
normalizationLevels
inE
t
)
t
where
where
go
_
_
(
Leaf
c
)
=
Leaf
c
go
_
_
(
Leaf
c
)
=
Leaf
c
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
f
((
m
,
v
,
_
)
:
ess
)
(
Node
c
i
children
)
go
f
((
m
,
v
,
_
)
:
ess
)
(
Node
c
i
children
)
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
(
i
^.
inE
)
m
v
)
ess
<$>
children
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
m
v
)
ess
<$>
children
{-
This is only normalizing a node with respect to its brothers (unlike all the
nodes of the same level).
normalizeEntropy inE modE = go $ modE identity
where
go _ (Leaf c) = Leaf c
go f (Node c i children)
| Map.null children =
panic "normalizeEntropy: impossible"
| otherwise =
Node c (f i) $ go (modE $ normalizeLevel es) <$> children
where
es = [ i' ^. inE | Node _ i' _ <- Map.elems children ]
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
levels
::
Trie
k
e
->
[[
Trie
k
e
]]
levels
::
Trie
k
e
->
[[
Trie
k
e
]]
...
@@ -276,38 +269,39 @@ data Tries k e = Tries
...
@@ -276,38 +269,39 @@ data Tries k e = Tries
makeLenses
''
T
ries
makeLenses
''
T
ries
nodeEntropySafe
::
Entropy
e
=>
Getting
e
i
e
->
Tries
k
i
->
e
nodeEntropySafe
inE
(
Tries
f
b
)
=
mean
$
noNaNs
[
nodeEntropy
inE
f
,
nodeEntropy
inE
b
]
nodeEntropyBwdOpt
::
Entropy
e
=>
Getting
e
i
e
->
Tries
k
i
->
e
nodeEntropyBwdOpt
inE
(
Tries
f
b
)
=
mean
$
nodeEntropy
inE
f
:
noNaNs
[
nodeEntropy
inE
b
]
instance
IsTrie
Tries
where
instance
IsTrie
Tries
where
buildTrie
tts
=
Tries
{
_fwd
=
buildTrie
tts
buildTrie
tts
=
Tries
{
_fwd
=
buildTrie
tts
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
}
}
nodeEntropy
inE
(
Tries
f
wd
bwd
)
=
nodeEntropy
inE
(
Tries
f
b
)
=
mean
[
nodeEntropy
inE
f
,
nodeEntropy
inE
b
]
-- VETODO reverse the query for bwd here
-- mean $ noNaNs [nodeEntropy inE fwd, nodeEntropy inE bwd . under reversed]
findTrie
ks
=
onTries
(
findTrie
ks
)
mean
$
noNaNs
[
nodeEntropy
inE
fwd
,
nodeEntropy
inE
bwd
]
findTrieR
ks
(
Tries
f
b
)
=
Tries
(
findTrieR
ks
f
)
(
findTrieR
(
reverse
ks
)
b
)
findTrie
ks
(
Tries
fwd
bwd
)
=
Tries
(
findTrie
ks
fwd
)
(
findTrie
ks
bwd
)
nodeChild
=
onTries
.
nodeChild
-- ^^
-- TODO: here this is tempting to reverse but this is not always what we
-- want. See also nodeAutonomy.
-- AD: I also tried to reverse here and I confirm getting unexpected results (whereas VETODO FIX below is ok)
-- since recursivity of the function makes the reverse multiple times (I guess)
nodeChild
k
(
Tries
fwd
bwd
)
=
Tries
(
nodeChild
k
fwd
)
(
nodeChild
k
bwd
)
evTrie
inE
setEV
=
onTries
$
evTrie
inE
setEV
normalizeEntropy
inE
modE
=
onTries
(
normalizeEntropy
inE
modE
)
normalizeEntropy
inE
=
onTries
.
normalizeEntropy
inE
printTrie
inE
(
Tries
f
wd
bwd
)
=
do
printTrie
inE
(
Tries
f
b
)
=
do
P
.
putStrLn
"Forward:"
P
.
putStrLn
"Forward:"
printTrie
inE
f
wd
printTrie
inE
f
P
.
putStrLn
""
P
.
putStrLn
""
P
.
putStrLn
"Backward:"
P
.
putStrLn
"Backward:"
printTrie
inE
b
wd
printTrie
inE
b
onTries
::
(
Trie
k
i
->
Trie
k
o
)
->
Tries
k
i
->
Tries
k
o
onTries
::
(
Trie
k
i
->
Trie
k
o
)
->
Tries
k
i
->
Tries
k
o
onTries
f
(
Tries
fwd
bwd
)
=
Tries
(
f
fwd
)
(
f
bwd
)
onTries
h
(
Tries
f
b
)
=
Tries
(
h
f
)
(
h
b
)
------------------------------------------------------------------------
------------------------------------------------------------------------
split
::
(
IsTrie
trie
,
Entropy
e
)
=>
Lens'
i
e
->
trie
Token
i
->
[
Token
]
->
[[
Token
]]
split
::
(
IsTrie
trie
,
Entropy
e
)
=>
Lens'
i
e
->
trie
Token
i
->
[
Token
]
->
[[
Token
]]
...
@@ -366,43 +360,23 @@ toToken' :: Int -> [[Text]] -> [[Token]]
...
@@ -366,43 +360,23 @@ toToken' :: Int -> [[Text]] -> [[Token]]
toToken'
n
input
=
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
<$>
toToken
<$>
input
toToken'
n
input
=
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
<$>
toToken
<$>
input
---------------------------------------------
---------------------------------------------
{-
set_entropy_vars :: Entropy e => Getting e i e -> (e -> i -> o) -> Tries Token i -> Trie Token o
set_entropy_vars :: Entropy e => Getting e i e -> (e -> i -> o) -> Tries Token i -> Trie Token o
set_entropy_vars inE modE tries@(Tries fwd _bwd) =
set_entropy_vars inE modE tries@(Tries fwd _bwd) =
mapTree
(
\
k
->
modE
$
entropy_var''
inE
tries
k
)
[]
fwd
mapTree (\k -> modE $
nodeEntropy inE (findTrieR k tries)
) [] fwd
mapTree :: ([Token] -> t -> e) -> [Token] -> Trie Token t -> Trie Token e
mapTree :: ([Token] -> t -> e) -> [Token] -> Trie Token t -> Trie Token e
mapTree f k t = go f k t
mapTree f k t = go f k t
where
where
go _ _ (Leaf c) = Leaf c
go _ _ (Leaf c) = Leaf c
go f k (Node c i children) = Node c (f k i) (Map.mapWithKey (\k'-> go f (k <> [k'])) children)
go f k (Node c i children) = Node c (f k i) (Map.mapWithKey (\k'-> go f (k <> [k'])) children)
-}
entropy_var''
::
Entropy
e
=>
Getting
e
i
e
->
Tries
Token
i
->
[
Token
]
->
e
entropy_var''
inE
tries
ng
=
mean
$
noNaNs
[
fwd
,
bwd
]
where
fwd
=
(
nodeEntropy
inE
(
_fwd
$
findTrie
ng
tries
))
bwd
=
(
nodeEntropy
inE
(
_bwd
$
findTrie
(
reverse
ng
)
tries
))
---------------------------------------------
---------------------------------------------
-- | TODO remove function below after following bug fixed
-- | TODO entropy_var' /= entropy_var on "<start> token.."
entropy_var'
::
Entropy
e
=>
Tries
Token
(
I
e
)
->
[
Token
]
->
e
entropy_var'
tries
ng
=
(
mean
$
noNaNs
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ng
tries
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
(
reverse
ng
)
tries
))
]
)
entropy_var
::
Entropy
e
=>
[
Text
]
->
Tries
Token
(
I
e
)
->
e
entropy_var
ng
trie
=
(
mean
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ntf
trie
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
ntb
trie
))
]
)
where
ntf
=
parseToken
<$>
ng
ntb
=
parseToken
<$>
reverse
ng
---------------------------------------------
type
Checks
e
=
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
,
e
,
e
,
e
,
e
)]
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
)]
->
IO
Bool
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
Checks
e
->
IO
Bool
testEleve
debug
n
output
checks
=
do
testEleve
debug
n
output
checks
=
do
let
let
{-
{-
...
@@ -422,9 +396,9 @@ testEleve debug n output checks = do
...
@@ -422,9 +396,9 @@ testEleve debug n output checks = do
-- forM_ pss (P.putStrLn . show)
-- forM_ pss (P.putStrLn . show)
P
.
putStrLn
""
P
.
putStrLn
""
printTrie
info_entropy
nt
printTrie
info_entropy
nt
P
.
putStrLn
""
--
P.putStrLn ""
P
.
putStrLn
"Entropy Var:"
--
P.putStrLn "Entropy Var:"
printTrie
identity
t''
--
printTrie identity t''
P
.
putStrLn
""
P
.
putStrLn
""
P
.
putStrLn
"Splitting:"
P
.
putStrLn
"Splitting:"
P
.
putStrLn
$
show
res
P
.
putStrLn
$
show
res
...
@@ -438,45 +412,43 @@ testEleve debug n output checks = do
...
@@ -438,45 +412,43 @@ testEleve debug n output checks = do
inp
=
toToken
<$>
input
inp
=
toToken
<$>
input
t
::
Tries
Token
Double
t
::
Tries
Token
Double
t
=
buildTrie
(
toToken'
n
input
)
t
=
buildTrie
(
toToken'
n
input
)
&
bwd
.
node_children
.
at
(
Terminal
Start
)
.
_Just
.
node_entropy
.~
nan
-- TODO NP: this is a hack to set the bwd entropy of Start at NaN.
evt
::
Tries
Token
(
I
Double
)
evt
=
evTrie
identity
set_entropy_var
t
nt
::
Tries
Token
(
I
Double
)
nt
::
Tries
Token
(
I
Double
)
nt
=
normalizeEntropy
i
dentity
set_autonomy
t
nt
=
normalizeEntropy
i
nfo_entropy_var
(
\
fe
i
->
i
&
info_autonomy
.~
fe
(
i
^.
info_entropy_var
))
ev
t
t''
::
Trie
Token
Double
--
t'' :: Trie Token Double
t''
=
set_entropy_vars
info_autonomy
(
\
e
_i
->
e
)
nt
--
t'' = set_entropy_vars info_autonomy (\e _i -> e) nt
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
check
f
msg
ref
my
=
check
f
msg
ref
my
=
if
f
ref
my
if
f
ref
my
then
P
.
putStrLn
$
"
PASS
"
<>
msg
<>
" "
<>
show
ref
then
P
.
putStrLn
$
"
\ESC
[32mPASS
\ESC
[m
"
<>
msg
<>
" "
<>
show
ref
else
P
.
putStrLn
$
"
FAIL
"
<>
msg
<>
" ref="
<>
show
ref
<>
" my="
<>
show
my
else
P
.
putStrLn
$
"
\ESC
[31mFAIL
\ESC
[m
"
<>
msg
<>
" ref="
<>
show
ref
<>
" my="
<>
show
my
checker
(
ngram
,
count
,
entropy
,
_ev
,
autonomy
,
bwd_entropy
,
fwd_entrop
y
)
=
do
checker
(
ngram
,
count
,
entropy
,
ev
,
autonomy
,
fwd_entropy
,
fwd_ev
,
fwd_autonomy
,
bwd_entropy
,
bwd_ev
,
bwd_autonom
y
)
=
do
let
ns
=
parseToken
<$>
T
.
words
ngram
let
ns
=
parseToken
<$>
T
.
words
ngram
nsb
=
parseToken
<$>
(
reverse
$
T
.
words
ngram
)
t'
=
findTrie
ns
t
t'
=
findTrie
ns
t
tvar
=
findTrie
ns
t''
-- tvar = findTrie ns t''
-- my_entropy_var = nodeEntropy identity tvar
nt'
=
findTrie
ns
nt
nt'
=
findTrie
ns
nt
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
tvar
)
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy_var"
entropy
(
nodeEntropy
identity
tvar
)
check
sim
"entropy"
entropy
(
nodeEntropyBwdOpt
info_entropy
nt'
)
--check sim ("entropy_varOK") entropy (entropy_var (T.words ngram) nt)
check
sim
"ev"
ev
(
nodeEntropy
info_entropy_var
nt'
)
--check sim "entropy" entropy (entropy_var' nt (parseToken <$> T.words ngram))
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
nt'
)
{- ^ FIXME 2 fun above should have same results (error in reverseToken):
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
nt'
))
<start> New York:
check
sim
"fwd_ev"
fwd_ev
(
nodeEntropy
info_entropy_var
(
_fwd
nt'
))
PASS count 1
check
sim
"fwd_autonomy"
fwd_autonomy
(
nodeEntropy
info_autonomy
(
_fwd
nt'
))
FAIL entropy ref=NaN my=0.0
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
identity
(
_bwd
t'
))
-}
check
sim
"bwd_ev"
bwd_ev
(
nodeEntropy
info_entropy_var
(
_bwd
nt'
))
check
sim
"bwd_autonomy"
bwd_autonomy
(
nodeEntropy
info_autonomy
(
_bwd
nt'
))
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
nt'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
identity
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
identity
(
_bwd
t'
))
-- | TODO real data is a list of tokenized sentences
-- | TODO real data is a list of tokenized sentences
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
...
@@ -493,40 +465,37 @@ example6 = ["le-petit chat"
...
@@ -493,40 +465,37 @@ example6 = ["le-petit chat"
,
"le gros rat"
,
"le gros rat"
]
]
checks0
,
checks2
::
[(
Text
,
Int
,
Double
,
Double
,
Double
,
Double
,
Double
)]
checks0
,
checks2
::
Checks
Double
checks0
=
checks0
=
[(
"<start>"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
[
(
"<start>"
,
1
,
nan
,
nan
,
nan
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
nan
,
nan
,
nan
)
,(
"New"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
1.584962500721156
,
0.0
)
,
(
"New"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
1.584962500721156
,
-
0.5283208335737188
,
2.0
)
,(
"York"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
0.0
,
1.584962500721156
)
,
(
"York"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
1.584962500721156
,
-
0.5283208335737188
,
2.0
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
)
,(
"is"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,
(
"is"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
)
,(
"and"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,
(
"and"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
)
--,("<stop>", 0, nan, nan, nan, 0.0, nan) Since it is not in the trie it no,
--, ("<stop>", 0.0, nan, nan, nan, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002)
-- need to count it.
-- Since it is not in the trie it no, need to count it.
,
(
"<start> New"
,
1
,
nan
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
--{-
,
(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
nan
,
nan
,
nan
)
,(
"<start> New"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,
(
"York is"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
-
1.584962500721156
,
-
0.7071067811865474
,
nan
,
nan
,
nan
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
nan
,
1.584962500721156
)
,
(
"is New"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"York and"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
-
1.584962500721156
,
-
0.7071067811865474
,
nan
,
nan
,
nan
)
,(
"is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"and New"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"<start> New York"
,
1
,
nan
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)
,
(
"New York is"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
-
1.584962500721156
,
nan
,
nan
,
nan
,
nan
)
,
(
"York is New"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"<start> New York"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,
(
"is New York"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"New York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York and"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
-
1.584962500721156
,
nan
,
nan
,
nan
,
nan
)
,(
"York is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"York and New"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"is New York"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"and New York"
,
1
,
0.0
,
nan
,
nan
,
0.0
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"New York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
,
nan
)
,(
"York and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"and New York"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)
--}
]
]
checks2
=
checks2
=
[]
{-
[("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
[("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
,("be or", 2, 0.5, nan, nan, nan, 1.0)
,("be or", 2, 0.5, nan, nan, nan, 1.0)
,("or not", 1, 0.0, nan, nan, nan, 0.0)
,("or not", 1, 0.0, nan, nan, nan, 0.0)
...
@@ -535,7 +504,7 @@ checks2 =
...
@@ -535,7 +504,7 @@ checks2 =
,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
,("be and", 1, 0.0, nan, nan, nan, 0.0)
,("be and", 1, 0.0, nan, nan, nan, 0.0)
]
]
-}
runTests
::
IO
()
runTests
::
IO
()
runTests
=
runTests
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment