Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
00787609
Commit
00787609
authored
Jun 11, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Ngrams] Entropy Variation OK in tests.
parent
86d4a8dd
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
100 additions
and
20 deletions
+100
-20
Eleve.hs
src/Gargantext/Text/Eleve.hs
+100
-20
No files found.
src/Gargantext/Text/Eleve.hs
View file @
00787609
...
...
@@ -45,7 +45,7 @@ import Debug.Trace (trace)
-- import Debug.SimpleReflect
import
Data.Functor.Reverse
import
Control.Lens
(
Lens
'
,
Getting
,
(
^.
),
(
^?
),
view
,
makeLenses
,
_Just
,
under
,
reversed
,
at
,
(
.~
))
import
Control.Lens
(
Lens
'
,
Getting
,
(
^.
),
(
^?
),
view
,
makeLenses
,
_Just
,
under
,
reversed
,
at
,
(
.~
)
,
to
,
set
)
import
Control.Monad
(
forM_
)
import
Data.Ord
(
Ord
)
import
qualified
Data.List
as
L
...
...
@@ -56,6 +56,7 @@ import Data.Map (Map)
import
Data.Maybe
(
fromMaybe
)
import
qualified
Data.Map
as
Map
import
Gargantext.Prelude
hiding
(
cs
)
import
qualified
Gargantext.Prelude
as
GP
import
qualified
Data.Tree
as
Tree
import
Data.Tree
(
Tree
)
import
qualified
Prelude
as
P
(
putStrLn
,
logBase
,
isNaN
,
RealFloat
)
...
...
@@ -70,19 +71,24 @@ type Entropy e =
------------------------------------------------------------------------
-- | Example and tests for development
data
I
e
=
I
{
_info_entropy
::
e
,
_info_autonomy
::
e
{
_info_entropy
::
e
,
_info_entropy_var
::
e
,
_info_autonomy
::
e
}
instance
Show
e
=>
Show
(
I
e
)
where
show
(
I
e
n
)
=
show
(
e
,
n
)
show
(
I
e
v
n
)
=
show
(
e
,
v
,
n
)
makeLenses
''
I
type
ModEntropy
i
o
e
=
(
e
->
e
)
->
i
->
o
set_autonomy
::
ModEntropy
e
(
I
e
)
e
set_autonomy
f
e
=
I
e
(
f
e
)
set_autonomy
f
e
=
I
e
e
(
f
e
)
set_entropy_var
::
ModEntropy
e
(
I
e
)
e
set_entropy_var
f
e
=
I
e
(
f
e
)
e
data
StartStop
=
Start
|
Stop
deriving
(
Ord
,
Eq
,
Show
)
...
...
@@ -91,6 +97,9 @@ data Token = NonTerminal Text
|
Terminal
StartStop
deriving
(
Ord
,
Eq
,
Show
)
rootTrie
::
Token
rootTrie
=
NonTerminal
""
isTerminal
::
Token
->
Bool
isTerminal
(
Terminal
_
)
=
True
isTerminal
(
NonTerminal
_
)
=
False
...
...
@@ -100,6 +109,23 @@ parseToken "<start>" = Terminal Start
parseToken
"<stop>"
=
Terminal
Stop
parseToken
t
=
NonTerminal
t
-- |
-- >>> reverseTokens [Terminal Start, NonTerminal "new", NonTerminal "york", Terminal Stop]
-- [Terminal Start,NonTerminal "york",NonTerminal "new",Terminal Stop]
reverseTokens
::
[
Token
]
->
[
Token
]
reverseTokens
xs
=
case
lastMay
xs
of
Nothing
->
[]
Just
(
Terminal
Stop
)
->
reverseTokens'
xs
<>
[
Terminal
Stop
]
_
->
reverseTokens'
xs
reverseTokens'
::
[
Token
]
->
[
Token
]
reverseTokens'
[]
=
[]
reverseTokens'
[
Terminal
Stop
]
=
[]
reverseTokens'
[
x
]
=
[
x
]
reverseTokens'
(
x
:
xs
)
=
case
x
of
Terminal
Start
->
[
Terminal
Start
]
<>
reverseTokens'
xs
_
->
reverseTokens'
xs
<>
[
x
]
toToken
::
[
Text
]
->
[
Token
]
toToken
xs
=
Terminal
Start
:
(
NonTerminal
<$>
xs
)
<>
[
Terminal
Stop
]
...
...
@@ -110,7 +136,6 @@ printToken = f
f
(
NonTerminal
x
)
=
x
f
(
Terminal
Start
)
=
"<start>"
f
(
Terminal
Stop
)
=
"<stop>"
------------------------------------------------------------------------
data
Trie
k
e
...
...
@@ -144,7 +169,6 @@ mkTrie c children
|
otherwise
=
Node
c
mempty
children
-----------------------------
-- | Trie to Tree since Tree as nice print function
toTree
::
k
->
Trie
k
e
->
Tree
(
k
,
Int
,
Maybe
e
)
toTree
k
(
Leaf
c
)
=
Tree
.
Node
(
k
,
c
,
Nothing
)
[]
...
...
@@ -152,7 +176,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
------------------------------------------------------------------------
------------------------------------------------------------------------
nan
::
Floating
e
=>
e
nan
=
0
/
0
...
...
@@ -180,7 +203,6 @@ entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) childre
where
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
------------------------------------------------------------------------
normalizeLevel
::
Entropy
e
=>
[
e
]
->
e
->
e
normalizeLevel
=
checkDiff
(
go
.
noNaNs
)
...
...
@@ -234,8 +256,9 @@ instance IsTrie Trie where
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
normalizeEntropy
inE
modE
t
=
go
(
modE
identity
)
(
entropyLevels
inE
t
)
t
normalizeEntropy
inE
modE
t
=
trace
(
show
level
)
$
go
(
modE
identity
)
level
t
where
level
=
(
entropyLevels
inE
t
)
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
_
_
(
Leaf
c
)
=
Leaf
c
-- go _ ([] : _) _ = panic "normalizeEntropy': empty level"
...
...
@@ -282,6 +305,8 @@ data Tries k e = Tries
makeLenses
''
T
ries
instance
IsTrie
Tries
where
buildTrie
to
n
tts
=
Tries
{
_fwd
=
buildTrie
to
n
tts
,
_bwd
=
buildTrie
to
n
(
map
reverse
$
tts
)
...
...
@@ -362,6 +387,43 @@ chunkAlongEleve n xs = L.take n <$> L.tails xs
toToken'
::
Int
->
[[
Text
]]
->
[[
Token
]]
toToken'
n
input
=
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
<$>
toToken
<$>
input
---------------------------------------------
set_entropy_vars
::
Entropy
e
=>
Getting
e
i
e
->
(
e
->
i
->
o
)
->
Tries
Token
i
->
Trie
Token
o
set_entropy_vars
inE
modE
tries
@
(
Tries
fwd
_bwd
)
=
mapTree
(
\
k
->
modE
$
entropy_var''
inE
tries
k
)
[
rootTrie
]
fwd
mapTree
::
([
Token
]
->
t
->
e
)
->
[
Token
]
->
Trie
Token
t
->
Trie
Token
e
mapTree
f
k
t
=
go
f
k
t
where
go
_
_
(
Leaf
c
)
=
Leaf
c
go
f
k
(
Node
c
i
children
)
=
Node
c
(
f
k
i
)
(
Map
.
mapWithKey
(
\
k'
->
go
f
(
filter
(
/=
rootTrie
)
$
k
<>
[
k'
]))
children
)
entropy_var''
::
Entropy
e
=>
Getting
e
i
e
->
Tries
Token
i
->
[
Token
]
->
e
entropy_var''
inE
tries
ng
=
mean
$
noNaNs
[
fwd
,
bwd
]
where
fwd
=
(
nodeEntropy
inE
(
_fwd
$
findTrie
ng
tries
))
bwd
=
(
nodeEntropy
inE
(
_bwd
$
findTrie
(
reverseTokens
ng
)
tries
))
---------------------------------------------
-- | TODO remove function below after following bug fixed
-- | TODO entropy_var' /= entropy_var on "<start> token.."
entropy_var'
::
Entropy
e
=>
Tries
Token
(
I
e
)
->
[
Token
]
->
e
entropy_var'
tries
ng
=
(
mean
$
noNaNs
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ng
tries
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
(
reverseTokens
ng
)
tries
))
]
)
entropy_var
::
Entropy
e
=>
[
Text
]
->
Tries
Token
(
I
e
)
->
e
entropy_var
ng
trie
=
(
mean
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ntf
trie
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
ntb
trie
))
]
)
where
ntf
=
parseToken
<$>
ng
ntb
=
parseToken
<$>
reverse
ng
---------------------------------------------
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
)]
->
IO
Bool
testEleve
debug
n
output
checks
=
do
let
...
...
@@ -401,12 +463,25 @@ testEleve debug n output checks = do
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
<$>
input
t
::
Tries
Token
Double
t
=
buildTrie
toToken'
n
input
&
bwd
.
node_children
.
at
(
Terminal
Start
)
.
_Just
.
node_entropy
.~
nan
-- NP: this is a hack to set the bwd entropy of Start at NaN.
t''
::
Trie
Token
Double
t''
=
set_entropy_vars
identity
(
\
e
_i
->
e
)
t
-- keeping nt for fwd and bwd checks
-- it has no sense to calculate entropy_var on fwd and bwd each
nt
::
Tries
Token
(
I
Double
)
nt
=
normalizeEntropy
identity
set_autonomy
t
nt'
::
Trie
Token
(
I
Double
)
nt'
=
normalizeEntropy
identity
set_autonomy
t''
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
set_autonomy
t
check
f
msg
ref
my
=
if
f
ref
my
...
...
@@ -416,23 +491,28 @@ testEleve debug n output checks = do
checker
(
ngram
,
count
,
entropy
,
_ev
,
autonomy
,
bwd_entropy
,
fwd_entropy
)
=
do
let
ns
=
parseToken
<$>
T
.
words
ngram
nsb
=
parseToken
<$>
(
reverse
$
T
.
words
ngram
)
t'
=
findTrie
ns
nt
tb'
=
findTrie
nsb
nt
-- TODO put this Variation Entropy at VETODO mark above maybe in nodeEntropy ?
ev
=
(
mean
[(
nodeEntropy
info_entropy
(
_fwd
t'
)),
(
nodeEntropy
info_entropy
(
_bwd
tb'
))])
tvar
=
findTrie
ns
t''
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy"
entropy
ev
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
t'
)
check
(
==
)
"count"
count
(
_node_count
tvar
)
check
sim
"entropy_var"
entropy
(
nodeEntropy
identity
tvar
)
--check sim ("entropy_varOK") entropy (entropy_var (T.words ngram) nt)
--check sim "entropy" entropy (entropy_var' nt (parseToken <$> T.words ngram))
{- ^ FIXME 2 fun above should have same results (error in reverseToken):
<start> New York:
PASS count 1
FAIL entropy ref=NaN my=0.0
-}
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
nt'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
info_entropy
(
_bwd
t'
))
printTrie
=
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
.
toTree
(
NonTerminal
""
)
.
toTree
rootTrie
-- | TODO real data is a list of tokenized sentences
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
...
...
@@ -460,7 +540,7 @@ checks0 =
--,("<stop>", 0, nan, nan, nan, 0.0, nan) Since it is not in the trie it no,
-- need to count it.
--
{-
{-
,("<start> New", 1, nan, nan, nan, nan, 0.0)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.4142135623730951, nan, 1.584962500721156)
,("York is", 1, 0.0, nan, nan, nan, 0.0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment