Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
eea6fe3e
Commit
eea6fe3e
authored
Jun 12, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Revert "[FIX] bwd_entropy : bwd tokens."
This reverts commit
a311ae32
.
parent
a311ae32
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
46 additions
and
39 deletions
+46
-39
Eleve.hs
src/Gargantext/Text/Eleve.hs
+46
-39
No files found.
src/Gargantext/Text/Eleve.hs
View file @
eea6fe3e
...
...
@@ -20,7 +20,9 @@ References:
, pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
Notes for current implementation:
- TODO fix normalization
- TODO extract longer ngrams (see paper above, viterbi algo can be used)
- TODO AD TEST: prop (Node c _e f) = c == Map.size f
- AD: Real ngrams extraction test
from Gargantext.Text.Terms import extractTermsUnsupervised
...
...
@@ -39,7 +41,7 @@ Notes for current implementation:
module
Gargantext.Text.Eleve
where
import
Debug.Trace
(
trace
)
--
import Debug.Trace (trace)
-- import Debug.SimpleReflect
import
Control.Lens
hiding
(
levels
,
children
)
...
...
@@ -63,9 +65,9 @@ nan = 0 / 0
noNaNs
::
P
.
RealFloat
e
=>
[
e
]
->
[
e
]
noNaNs
=
filter
(
not
.
P
.
isNaN
)
updateIf
NaN
::
P
.
RealFloat
e
=>
e
->
e
->
e
updateIf
NaN
e0
e
|
P
.
isNaN
e
=
e0
|
otherwise
=
e
updateIf
Defined
::
P
.
RealFloat
e
=>
e
->
e
->
e
updateIf
Defined
e0
e
|
P
.
isNaN
e
=
e0
|
otherwise
=
e
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
...
...
@@ -73,9 +75,6 @@ sim x y = x == y || (P.isNaN x && P.isNaN y)
subst
::
Entropy
e
=>
(
e
,
e
)
->
e
->
e
subst
(
src
,
dst
)
x
|
sim
src
x
=
dst
|
otherwise
=
x
------------------------------------------------------------------------
type
Entropy
e
=
...
...
@@ -88,9 +87,9 @@ type Entropy e =
------------------------------------------------------------------------
-- | Example and tests for development
data
I
e
=
I
{
_info_entropy
::
e
{
_info_entropy
::
e
,
_info_entropy_var
::
e
,
_info_autonomy
::
e
,
_info_autonomy
::
e
}
instance
Show
e
=>
Show
(
I
e
)
where
...
...
@@ -117,22 +116,14 @@ isTerminal :: Token -> Bool
isTerminal
(
Terminal
_
)
=
True
isTerminal
(
NonTerminal
_
)
=
False
chunkAlongEleve
::
Int
->
[
a
]
->
[[
a
]]
chunkAlongEleve
n
xs
=
L
.
take
n
<$>
L
.
tails
xs
toToken'
::
Int
->
[
Text
]
->
[[
Token
]]
toToken'
n
input
=
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
$
toToken
input
toToken
::
[
Text
]
->
[
Token
]
toToken
xs
=
Terminal
Start
:
(
NonTerminal
<$>
xs
)
<>
[
Terminal
Stop
]
parseToken
::
Text
->
Token
parseToken
"<start>"
=
Terminal
Start
parseToken
"<stop>"
=
Terminal
Stop
parseToken
t
=
NonTerminal
t
toToken
::
[
Text
]
->
[
Token
]
toToken
xs
=
Terminal
Start
:
(
NonTerminal
<$>
xs
)
<>
[
Terminal
Stop
]
printToken
::
Token
->
Text
printToken
=
f
where
...
...
@@ -192,20 +183,20 @@ entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) childre
where
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
------------------------------------------------------------------------
-- | TODO if stdv == 0 then not defined (NaN)
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
--normalizeLevel m v e = if v == 0 then nan else (e - m) / v
{- Unused
nodeChildren :: Trie k e -> Map k (Trie k e)
nodeChildren (Node _ _ cs) = cs
nodeChildren (Leaf _) = Map.empty
-}
class
IsTrie
trie
where
buildTrie
::
Entropy
e
=>
Int
->
[
Text
]
->
trie
Token
e
buildTrie
::
Entropy
e
=>
[[
Token
]
]
->
trie
Token
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
...
...
@@ -221,7 +212,7 @@ class IsTrie trie where
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance
IsTrie
Trie
where
buildTrie
n
ts
=
trace
(
show
ts
)
$
entropyTrie
isTerminal
$
insertTries
$
toToken'
n
ts
buildTrie
ts
=
entropyTrie
isTerminal
$
insertTries
ts
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
_
(
Leaf
_
)
=
nan
...
...
@@ -290,9 +281,9 @@ nodeEntropyBwdOpt inE (Tries f b) =
mean
$
nodeEntropy
inE
f
:
noNaNs
[
nodeEntropy
inE
b
]
instance
IsTrie
Tries
where
buildTrie
n
tts
=
Tries
{
_fwd
=
buildTrie
n
tts
,
_bwd
=
buildTrie
n
(
reverse
tts
)
}
buildTrie
tts
=
Tries
{
_fwd
=
buildTrie
tts
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
}
nodeEntropy
inE
(
Tries
f
b
)
=
mean
[
nodeEntropy
inE
f
,
nodeEntropy
inE
b
]
...
...
@@ -365,6 +356,25 @@ mainEleve n input = map (map printToken) . split identity (t :: Trie Token Doubl
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-}
chunkAlongEleve
::
Int
->
[
a
]
->
[[
a
]]
chunkAlongEleve
n
xs
=
L
.
take
n
<$>
L
.
tails
xs
toToken'
::
Int
->
[[
Text
]]
->
[[
Token
]]
toToken'
n
input
=
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
))
<$>
toToken
<$>
input
---------------------------------------------
{-
set_entropy_vars :: Entropy e => Getting e i e -> (e -> i -> o) -> Tries Token i -> Trie Token o
set_entropy_vars inE modE tries@(Tries fwd _bwd) =
mapTree (\k -> modE $ nodeEntropy inE (findTrieR k tries)) [] fwd
mapTree :: ([Token] -> t -> e) -> [Token] -> Trie Token t -> Trie Token e
mapTree f k t = go f k t
where
go _ _ (Leaf c) = Leaf c
go f k (Node c i children) = Node c (f k i) (Map.mapWithKey (\k'-> go f (k <> [k'])) children)
-}
---------------------------------------------
type
Checks
e
=
[(
Text
,
Int
,
e
,
e
,
e
,
e
,
e
,
e
,
e
,
e
,
e
)]
...
...
@@ -405,7 +415,7 @@ testEleve debug n output checks = do
inp
=
toToken
<$>
input
t
::
Tries
Token
Double
t
=
buildTrie
n
(
L
.
concat
input
)
t
=
buildTrie
(
toToken'
n
input
)
evt
::
Tries
Token
(
I
Double
)
evt
=
evTrie
identity
set_entropy_var
t
...
...
@@ -432,19 +442,16 @@ testEleve debug n output checks = do
nt'
=
findTrie
ns
nt
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy"
entropy
(
nodeEntropyBwdOpt
info_entropy
nt'
)
check
sim
"ev"
ev
(
nodeEntropy
info_entropy_var
nt'
)
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
nt'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
nt'
))
check
(
==
)
"count"
count
(
_node_count
(
_fwd
t'
))
check
sim
"entropy"
entropy
(
nodeEntropyBwdOpt
info_entropy
nt'
)
check
sim
"ev"
ev
(
nodeEntropy
info_entropy_var
nt'
)
check
sim
"autonomy"
autonomy
(
nodeEntropy
info_autonomy
nt'
)
check
sim
"fwd_entropy"
fwd_entropy
(
nodeEntropy
info_entropy
(
_fwd
nt'
))
check
sim
"fwd_ev"
fwd_ev
(
nodeEntropy
info_entropy_var
(
_fwd
nt'
))
check
sim
"fwd_autonomy"
fwd_autonomy
(
nodeEntropy
info_autonomy
(
_fwd
nt'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
info_entropy
(
_bwd
nt'
))
check
sim
"fwd_autonomy"
fwd_autonomy
(
nodeEntropy
info_autonomy
(
_fwd
nt'
))
check
sim
"bwd_entropy"
bwd_entropy
(
nodeEntropy
identity
(
_bwd
t'
))
check
sim
"bwd_ev"
bwd_ev
(
nodeEntropy
info_entropy_var
(
_bwd
nt'
))
check
sim
"bwd_autonomy"
bwd_autonomy
(
nodeEntropy
info_autonomy
(
_bwd
nt'
))
check
sim
"bwd_autonomy"
bwd_autonomy
(
nodeEntropy
info_autonomy
(
_bwd
nt'
))
-- | TODO real data is a list of tokenized sentences
example0
,
example1
,
example2
,
example3
,
example4
,
example5
,
example6
::
[
Text
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment