Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
f65383e8
Unverified
Commit
f65383e8
authored
Jun 11, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve: reverse, buildTrie, printTrie...
parent
f2de3b66
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
44 deletions
+28
-44
Eleve.hs
src/Gargantext/Text/Eleve.hs
+28
-44
No files found.
src/Gargantext/Text/Eleve.hs
View file @
f65383e8
...
...
@@ -106,24 +106,6 @@ parseToken "<start>" = Terminal Start
parseToken
"<stop>"
=
Terminal
Stop
parseToken
t
=
NonTerminal
t
-- |
-- >>> reverseTokens [Terminal Start, NonTerminal "new", NonTerminal "york", Terminal Stop]
-- [Terminal Start,NonTerminal "york",NonTerminal "new",Terminal Stop]
reverseTokens
::
[
Token
]
->
[
Token
]
reverseTokens
xs
=
case
lastMay
xs
of
Nothing
->
[]
Just
(
Terminal
Stop
)
->
reverseTokens'
xs
<>
[
Terminal
Stop
]
_
->
reverseTokens'
xs
reverseTokens'
::
[
Token
]
->
[
Token
]
reverseTokens'
[]
=
[]
reverseTokens'
[
Terminal
Stop
]
=
[]
reverseTokens'
[
x
]
=
[
x
]
reverseTokens'
(
x
:
xs
)
=
case
x
of
Terminal
Start
->
[
Terminal
Start
]
<>
reverseTokens'
xs
_
->
reverseTokens'
xs
<>
[
x
]
toToken
::
[
Text
]
->
[
Token
]
toToken
xs
=
Terminal
Start
:
(
NonTerminal
<$>
xs
)
<>
[
Terminal
Stop
]
...
...
@@ -213,10 +195,11 @@ nodeChildren (Leaf _) = Map.empty
class
IsTrie
trie
where
buildTrie
::
Entropy
e
=>
(
Int
->
[[
Text
]]
->
[[
Token
]])
->
Int
->
[[
Text
]]
->
trie
Token
e
buildTrie
::
Entropy
e
=>
[[
Token
]]
->
trie
Token
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
printTrie
::
(
Show
i
,
Entropy
e
)
=>
Getting
e
i
e
->
trie
Token
i
->
IO
()
normalizeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
trie
k
i
->
trie
k
o
...
...
@@ -226,7 +209,7 @@ class IsTrie trie where
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance
IsTrie
Trie
where
buildTrie
t
o
n
ts
=
entropyTrie
isTerminal
$
insertTries
$
to
n
ts
buildTrie
t
s
=
entropyTrie
isTerminal
$
insertTries
ts
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
_
(
Leaf
_
)
=
nan
...
...
@@ -236,10 +219,18 @@ instance IsTrie Trie where
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
printTrie
inE
t
=
do
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
$
toTree
(
NonTerminal
""
)
t
P
.
putStrLn
" Levels:"
forM_
(
normalizationLevels
inE
t
)
$
\
level
->
P
.
putStrLn
$
" "
<>
show
level
normalizeEntropy
inE
modE
t
=
go
(
modE
identity
)
(
normalizationLevels
inE
t
)
t
where
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
_
_
(
Leaf
c
)
=
Leaf
c
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
f
((
m
,
v
,
_
)
:
ess
)
(
Node
c
i
children
)
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
(
i
^.
inE
)
m
v
)
ess
<$>
children
...
...
@@ -269,7 +260,7 @@ levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
subForest
(
Node
_
_
children
)
=
Map
.
elems
children
entropyLevels
::
Entropy
e
=>
Getting
e
i
e
->
Trie
k
i
->
[[
e
]]
entropyLevels
inE
=
fmap
(
noNaNs
.
map
(
nodeEntropy
inE
))
.
levels
entropyLevels
inE
=
fmap
(
noNaNs
.
map
(
nodeEntropy
inE
))
.
L
.
tail
.
levels
normalizationLevels
::
Entropy
e
=>
Getting
e
i
e
->
Trie
k
i
->
[(
e
,
e
,
Int
)]
normalizationLevels
inE
=
fmap
f
.
entropyLevels
inE
...
...
@@ -288,9 +279,9 @@ makeLenses ''Tries
instance
IsTrie
Tries
where
buildTrie
t
o
n
tts
=
Tries
{
_fwd
=
buildTrie
to
n
tts
,
_bwd
=
buildTrie
to
n
(
map
reverse
$
tts
)
}
buildTrie
t
ts
=
Tries
{
_fwd
=
buildTrie
tts
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
}
nodeEntropy
inE
(
Tries
fwd
bwd
)
=
-- VETODO reverse the query for bwd here
...
...
@@ -308,6 +299,13 @@ instance IsTrie Tries where
normalizeEntropy
inE
modE
=
onTries
(
normalizeEntropy
inE
modE
)
printTrie
inE
(
Tries
fwd
bwd
)
=
do
P
.
putStrLn
"Forward:"
printTrie
inE
fwd
P
.
putStrLn
""
P
.
putStrLn
"Backward:"
printTrie
inE
bwd
onTries
::
(
Trie
k
i
->
Trie
k
o
)
->
Tries
k
i
->
Tries
k
o
onTries
f
(
Tries
fwd
bwd
)
=
Tries
(
f
fwd
)
(
f
bwd
)
...
...
@@ -382,14 +380,14 @@ entropy_var'' :: Entropy e => Getting e i e -> Tries Token i -> [Token] -> e
entropy_var''
inE
tries
ng
=
mean
$
noNaNs
[
fwd
,
bwd
]
where
fwd
=
(
nodeEntropy
inE
(
_fwd
$
findTrie
ng
tries
))
bwd
=
(
nodeEntropy
inE
(
_bwd
$
findTrie
(
reverse
Tokens
ng
)
tries
))
bwd
=
(
nodeEntropy
inE
(
_bwd
$
findTrie
(
reverse
ng
)
tries
))
---------------------------------------------
-- | TODO remove function below after following bug fixed
-- | TODO entropy_var' /= entropy_var on "<start> token.."
entropy_var'
::
Entropy
e
=>
Tries
Token
(
I
e
)
->
[
Token
]
->
e
entropy_var'
tries
ng
=
(
mean
$
noNaNs
[
(
nodeEntropy
info_entropy
(
_fwd
$
findTrie
ng
tries
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
(
reverse
Tokens
ng
)
tries
))
,
(
nodeEntropy
info_entropy
(
_bwd
$
findTrie
(
reverse
ng
)
tries
))
]
)
...
...
@@ -423,18 +421,10 @@ testEleve debug n output checks = do
P
.
putStrLn
(
show
input
)
-- forM_ pss (P.putStrLn . show)
P
.
putStrLn
""
P
.
putStrLn
"Forward:"
printTrie
(
_fwd
nt
)
P
.
putStrLn
""
P
.
putStrLn
"Backward:"
printTrie
(
_bwd
nt
)
P
.
putStrLn
""
P
.
putStrLn
"Levels:"
forM_
(
normalizationLevels
identity
t''
)
$
\
level
->
P
.
putStrLn
$
" "
<>
show
level
printTrie
info_entropy
nt
P
.
putStrLn
""
P
.
putStrLn
"Entropy Var:"
printTrie
t''
printTrie
identity
t''
P
.
putStrLn
""
P
.
putStrLn
"Splitting:"
P
.
putStrLn
$
show
res
...
...
@@ -448,7 +438,7 @@ testEleve debug n output checks = do
inp
=
toToken
<$>
input
t
::
Tries
Token
Double
t
=
buildTrie
toToken'
n
input
t
=
buildTrie
(
toToken'
n
input
)
&
bwd
.
node_children
.
at
(
Terminal
Start
)
.
_Just
.
node_entropy
.~
nan
-- TODO NP: this is a hack to set the bwd entropy of Start at NaN.
...
...
@@ -488,12 +478,6 @@ testEleve debug n output checks = do
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
identity
(
_fwd
t'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
identity
(
_bwd
t'
))
printTrie
::
Show
e
=>
Trie
Token
e
->
IO
()
printTrie
=
P
.
putStrLn
.
Tree
.
drawTree
.
fmap
show
.
toTree
(
NonTerminal
""
)
-- | TODO real data is a list of tokenized sentences
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
example0
=
[
"New-York is New-York and New-York"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment