Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f65383e8
Unverified
Commit
f65383e8
authored
Jun 11, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve: reverse, buildTrie, printTrie...
parent
f2de3b66
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
44 deletions
+28
-44
Eleve.hs
src/Gargantext/Text/Eleve.hs
+28
-44
No files found.
src/Gargantext/Text/Eleve.hs
View file @
f65383e8
...
@@ -106,24 +106,6 @@ parseToken "<start>" = Terminal Start
...
@@ -106,24 +106,6 @@ parseToken "<start>" = Terminal Start
parseToken
"<stop>"
=
Terminal
Stop
parseToken
"<stop>"
=
Terminal
Stop
parseToken
t
=
NonTerminal
t
parseToken
t
=
NonTerminal
t
-- |
-- >>> reverseTokens [Terminal Start, NonTerminal "new", NonTerminal "york", Terminal Stop]
-- [Terminal Start,NonTerminal "york",NonTerminal "new",Terminal Stop]
reverseTokens
::
[
Token
]
->
[
Token
]
reverseTokens
xs
=
case
lastMay
xs
of
Nothing
->
[]
Just
(
Terminal
Stop
)
->
reverseTokens'
xs
<>
[
Terminal
Stop
]
_
->
reverseTokens'
xs
reverseTokens'
::
[
Token
]
->
[
Token
]
reverseTokens'
[]
=
[]
reverseTokens'
[
Terminal
Stop
]
=
[]
reverseTokens'
[
x
]
=
[
x
]
reverseTokens'
(
x
:
xs
)
=
case
x
of
Terminal
Start
->
[
Terminal
Start
]
<>
reverseTokens'
xs
_
->
reverseTokens'
xs
<>
[
x
]
toToken
::
[
Text
]
->
[
Token
]
toToken
::
[
Text
]
->
[
Token
]
toToken
xs
=
Terminal
Start
:
(
NonTerminal
<$>
xs
)
<>
[
Terminal
Stop
]
toToken
xs
=
Terminal
Start
:
(
NonTerminal
<$>
xs
)
<>
[
Terminal
Stop
]
...
@@ -213,10 +195,11 @@ nodeChildren (Leaf _) = Map.empty
...
@@ -213,10 +195,11 @@ nodeChildren (Leaf _) = Map.empty
class
IsTrie
trie
where
class
IsTrie
trie
where
buildTrie
::
Entropy
e
=>
(
Int
->
[[
Text
]]
->
[[
Token
]])
->
Int
->
[[
Text
]]
->
trie
Token
e
buildTrie
::
Entropy
e
=>
[[
Token
]]
->
trie
Token
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
printTrie
::
(
Show
i
,
Entropy
e
)
=>
Getting
e
i
e
->
trie
Token
i
->
IO
()
normalizeEntropy
::
Entropy
e
normalizeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
trie
k
i
->
trie
k
o
->
trie
k
i
->
trie
k
o
...
@@ -226,7 +209,7 @@ class IsTrie trie where
...
@@ -226,7 +209,7 @@ class IsTrie trie where
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance
IsTrie
Trie
where
instance
IsTrie
Trie
where
buildTrie
t
o
n
ts
=
entropyTrie
isTerminal
$
insertTries
$
to
n
ts
buildTrie
t
s
=
entropyTrie
isTerminal
$
insertTries
ts
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
_
(
Leaf
_
)
=
nan
nodeEntropy
_
(
Leaf
_
)
=
nan
...
@@ -236,10 +219,18 @@ instance IsTrie Trie where
...
@@ -236,10 +219,18 @@ instance IsTrie Trie where
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
printTrie
inE
t
=
do
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
$
toTree
(
NonTerminal
""
)
t
P
.
putStrLn
" Levels:"
forM_
(
normalizationLevels
inE
t
)
$
\
level
->
P
.
putStrLn
$
" "
<>
show
level
normalizeEntropy
inE
modE
t
=
go
(
modE
identity
)
(
normalizationLevels
inE
t
)
t
normalizeEntropy
inE
modE
t
=
go
(
modE
identity
)
(
normalizationLevels
inE
t
)
t
where
where
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
_
_
(
Leaf
c
)
=
Leaf
c
go
_
_
(
Leaf
c
)
=
Leaf
c
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
f
((
m
,
v
,
_
)
:
ess
)
(
Node
c
i
children
)
go
f
((
m
,
v
,
_
)
:
ess
)
(
Node
c
i
children
)
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
(
i
^.
inE
)
m
v
)
ess
<$>
children
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
(
i
^.
inE
)
m
v
)
ess
<$>
children
...
@@ -269,7 +260,7 @@ levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
...
@@ -269,7 +260,7 @@ levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
subForest
(
Node
_
_
children
)
=
Map
.
elems
children
subForest
(
Node
_
_
children
)
=
Map
.
elems
children
entropyLevels
::
Entropy
e
=>
Getting
e
i
e
->
Trie
k
i
->
[[
e
]]
entropyLevels
::
Entropy
e
=>
Getting
e
i
e
->
Trie
k
i
->
[[
e
]]
entropyLevels
inE
=
fmap
(
noNaNs
.
map
(
nodeEntropy
inE
))
.
levels
entropyLevels
inE
=
fmap
(
noNaNs
.
map
(
nodeEntropy
inE
))
.
L
.
tail
.
levels
normalizationLevels
::
Entropy
e
=>
Getting
e
i
e
->
Trie
k
i
->
[(
e
,
e
,
Int
)]
normalizationLevels
::
Entropy
e
=>
Getting
e
i
e
->
Trie
k
i
->
[(
e
,
e
,
Int
)]
normalizationLevels
inE
=
fmap
f
.
entropyLevels
inE
normalizationLevels
inE
=
fmap
f
.
entropyLevels
inE
...
@@ -288,9 +279,9 @@ makeLenses ''Tries
...
@@ -288,9 +279,9 @@ makeLenses ''Tries
instance
IsTrie
Tries
where
instance
IsTrie
Tries
where
buildTrie
t
o
n
tts
=
Tries
{
_fwd
=
buildTrie
to
n
tts
buildTrie
t
ts
=
Tries
{
_fwd
=
buildTrie
tts
,
_bwd
=
buildTrie
to
n
(
map
reverse
$
tts
)
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
}
}
nodeEntropy
inE
(
Tries
fwd
bwd
)
=
nodeEntropy
inE
(
Tries
fwd
bwd
)
=
-- VETODO reverse the query for bwd here
-- VETODO reverse the query for bwd here
...
@@ -308,6 +299,13 @@ instance IsTrie Tries where
...
@@ -308,6 +299,13 @@ instance IsTrie Tries where
normalizeEntropy
inE
modE
=
onTries
(
normalizeEntropy
inE
modE
)
normalizeEntropy
inE
modE
=
onTries
(
normalizeEntropy
inE
modE
)
printTrie
inE
(
Tries
fwd
bwd
)
=
do
P
.
putStrLn
"Forward:"
printTrie
inE
fwd
P
.
putStrLn
""
P
.
putStrLn
"Backward:"
printTrie
inE
bwd
onTries
::
(
Trie
k
i
->
Trie
k
o
)
->
Tries
k
i
->
Tries
k
o
onTries
::
(
Trie
k
i
->
Trie
k
o
)
->
Tries
k
i
->
Tries
k
o
onTries
f
(
Tries
fwd
bwd
)
=
Tries
(
f
fwd
)
(
f
bwd
)
onTries
f
(
Tries
fwd
bwd
)
=
Tries
(
f
fwd
)
(
f
bwd
)
...
@@ -382,14 +380,14 @@ entropy_var'' :: Entropy e => Getting e i e -> Tries Token i -> [Token] -> e
...
@@ -382,14 +380,14 @@ entropy_var'' :: Entropy e => Getting e i e -> Tries Token i -> [Token] -> e
entropy_var''
inE
tries
ng
=
mean
$
noNaNs
[
fwd
,
bwd
]
entropy_var''
inE
tries
ng
=
mean
$
noNaNs
[
fwd
,
bwd
]
where
where
fwd
=
(
nodeEntropy
inE
(
_fwd
$
findTrie
ng
tries
))
fwd
=
(
nodeEntropy
inE
(
_fwd
$
findTrie
ng
tries
))
bwd
=
(
nodeEntropy
inE
(
_bwd
$
findTrie
(
reverse
Tokens
ng
)
tries
))
bwd
=
(
nodeEntropy
inE
(
_bwd
$
findTrie
(
reverse
ng
)
tries
))
---------------------------------------------
---------------------------------------------
-- | TODO remove function below after following bug fixed
-- | TODO remove function below after following bug fixed
-- | TODO entropy_var' /= entropy_var on "<start> token.."
-- | TODO entropy_var' /= entropy_var on "<start> token.."
entropy_var'
::
Entropy
e
=>
Tries
Token
(
I
e
)
->
[
Token
]
->
e
entropy_var'
::
Entropy
e
=>
Tries
Token
(
I
e
)
->
[
Token
]
->
e
entropy_var'
tries
ng
=
(
mean
$
noNaNs
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ng
tries
))
entropy_var'
tries
ng
=
(
mean
$
noNaNs
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ng
tries
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
(
reverse
Tokens
ng
)
tries
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
(
reverse
ng
)
tries
))
]
]
)
)
...
@@ -423,18 +421,10 @@ testEleve debug n output checks = do
...
@@ -423,18 +421,10 @@ testEleve debug n output checks = do
P
.
putStrLn
(
show
input
)
P
.
putStrLn
(
show
input
)
-- forM_ pss (P.putStrLn . show)
-- forM_ pss (P.putStrLn . show)
P
.
putStrLn
""
P
.
putStrLn
""
P
.
putStrLn
"Forward:"
printTrie
info_entropy
nt
printTrie
(
_fwd
nt
)
P
.
putStrLn
""
P
.
putStrLn
"Backward:"
printTrie
(
_bwd
nt
)
P
.
putStrLn
""
P
.
putStrLn
"Levels:"
forM_
(
normalizationLevels
identity
t''
)
$
\
level
->
P
.
putStrLn
$
" "
<>
show
level
P
.
putStrLn
""
P
.
putStrLn
""
P
.
putStrLn
"Entropy Var:"
P
.
putStrLn
"Entropy Var:"
printTrie
t''
printTrie
identity
t''
P
.
putStrLn
""
P
.
putStrLn
""
P
.
putStrLn
"Splitting:"
P
.
putStrLn
"Splitting:"
P
.
putStrLn
$
show
res
P
.
putStrLn
$
show
res
...
@@ -448,7 +438,7 @@ testEleve debug n output checks = do
...
@@ -448,7 +438,7 @@ testEleve debug n output checks = do
inp
=
toToken
<$>
input
inp
=
toToken
<$>
input
t
::
Tries
Token
Double
t
::
Tries
Token
Double
t
=
buildTrie
toToken'
n
input
t
=
buildTrie
(
toToken'
n
input
)
&
bwd
.
node_children
.
at
(
Terminal
Start
)
.
_Just
.
node_entropy
.~
nan
&
bwd
.
node_children
.
at
(
Terminal
Start
)
.
_Just
.
node_entropy
.~
nan
-- TODO NP: this is a hack to set the bwd entropy of Start at NaN.
-- TODO NP: this is a hack to set the bwd entropy of Start at NaN.
...
@@ -488,12 +478,6 @@ testEleve debug n output checks = do
...
@@ -488,12 +478,6 @@ testEleve debug n output checks = do
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
identity
(
_fwd
t'
))
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
identity
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
identity
(
_bwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
identity
(
_bwd
t'
))
printTrie
::
Show
e
=>
Trie
Token
e
->
IO
()
printTrie
=
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
.
toTree
(
NonTerminal
""
)
-- | TODO real data is a list of tokenized sentences
-- | TODO real data is a list of tokenized sentences
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
example0
=
[
"New-York is New-York and New-York"
]
example0
=
[
"New-York is New-York and New-York"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment