Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
fed5f0b8
Commit
fed5f0b8
authored
Jun 07, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX][Ngrams] order of tokens.
parent
71dce4c8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
9 deletions
+18
-9
Eleve.hs
src/Gargantext/Text/Eleve.hs
+18
-9
No files found.
src/Gargantext/Text/Eleve.hs
View file @
fed5f0b8
...
@@ -170,7 +170,7 @@ entropyTrie _ (Leaf c) = Leaf c
...
@@ -170,7 +170,7 @@ entropyTrie _ (Leaf c) = Leaf c
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
where
where
e
=
sum
$
map
f
$
Map
.
toList
children
e
=
sum
$
map
f
$
Map
.
toList
children
f
(
k
,
child
)
=
if
pred
k
then
chc
*
P
.
logBase
2
(
fromIntegral
c
)
f
(
k
,
child
)
=
if
pred
k
then
chc
*
P
.
logBase
2
(
fromIntegral
c
)
else
-
chc
*
P
.
logBase
2
chc
else
-
chc
*
P
.
logBase
2
chc
where
where
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
...
@@ -204,11 +204,13 @@ nodeChildren (Leaf _) = Map.empty
...
@@ -204,11 +204,13 @@ nodeChildren (Leaf _) = Map.empty
-}
-}
data
Ward
=
ForWard
|
BackWard
class
IsTrie
trie
where
class
IsTrie
trie
where
buildTrie
::
Entropy
e
=>
[[
Token
]]
->
trie
Token
e
buildTrie
::
Entropy
e
=>
(
Int
->
[[
Text
]]
->
[[
Token
]])
->
Int
->
[[
Text
]]
->
trie
Token
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
normalizeEntropy
::
Entropy
e
normalizeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
trie
k
i
->
trie
k
o
->
trie
k
i
->
trie
k
o
...
@@ -218,7 +220,7 @@ class IsTrie trie where
...
@@ -218,7 +220,7 @@ class IsTrie trie where
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance
IsTrie
Trie
where
instance
IsTrie
Trie
where
buildTrie
=
entropyTrie
isTerminal
.
insertTrie
s
buildTrie
to
n
ts
=
entropyTrie
isTerminal
$
insertTries
$
to
n
t
s
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
_
(
Leaf
_
)
=
-- trace "nodeEntropy of Leaf" $
nodeEntropy
_
(
Leaf
_
)
=
-- trace "nodeEntropy of Leaf" $
...
@@ -275,10 +277,15 @@ data Tries k e = Tries
...
@@ -275,10 +277,15 @@ data Tries k e = Tries
,
_bwd
::
Trie
k
e
,
_bwd
::
Trie
k
e
}
}
toToken'
::
Int
->
[[
Text
]]
->
[[
Token
]]
toToken'
n
input
=
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
<$>
toToken
<$>
input
instance
IsTrie
Tries
where
instance
IsTrie
Tries
where
buildTrie
t
ts
=
Tries
{
_fwd
=
buildTrie
tts
buildTrie
t
o
n
tts
=
Tries
{
_fwd
=
buildTrie
to
n
tts
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
,
_bwd
=
buildTrie
to
n
(
map
reverse
$
tts
)
}
}
nodeEntropy
inE
(
Tries
fwd
bwd
)
=
nodeEntropy
inE
(
Tries
fwd
bwd
)
=
mean
$
noNaNs
[
nodeEntropy
inE
fwd
,
nodeEntropy
inE
bwd
]
mean
$
noNaNs
[
nodeEntropy
inE
fwd
,
nodeEntropy
inE
bwd
]
...
@@ -390,7 +397,7 @@ testEleve debug n output checks = do
...
@@ -390,7 +397,7 @@ testEleve debug n output checks = do
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
<$>
input
inp
=
toToken
<$>
input
t
=
buildTrie
$
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
<$>
inp
t
=
buildTrie
toToken'
n
input
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
set_autonomy
t
nt
=
normalizeEntropy
identity
set_autonomy
t
...
@@ -440,6 +447,7 @@ checks0 =
...
@@ -440,6 +447,7 @@ checks0 =
,(
"and"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,(
"and"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,(
"<stop>"
,
0
,
nan
,
nan
,
nan
,
0.0
,
nan
)
,(
"<stop>"
,
0
,
nan
,
nan
,
nan
,
0.0
,
nan
)
{-
,("<start> New", 1, nan, nan, nan, nan, 0.0)
,("<start> New", 1, nan, nan, nan, nan, 0.0)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.4142135623730951, nan, 1.584962500721156)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.4142135623730951, nan, 1.584962500721156)
,("York is", 1, 0.0, nan, nan, nan, 0.0)
,("York is", 1, 0.0, nan, nan, nan, 0.0)
...
@@ -456,6 +464,7 @@ checks0 =
...
@@ -456,6 +464,7 @@ checks0 =
,("York and New", 1, 0.0, nan, nan, nan, 0.0)
,("York and New", 1, 0.0, nan, nan, nan, 0.0)
,("and New York", 1, 0.0, nan, nan, nan, 0.0)
,("and New York", 1, 0.0, nan, nan, nan, 0.0)
,("New York <stop>", 1, nan, nan, nan, nan, nan)
,("New York <stop>", 1, nan, nan, nan, nan, nan)
-}
]
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment