Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
7ee073cf
Unverified
Commit
7ee073cf
authored
Jun 13, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Eleve: cleanup and restore mainEleve
parent
aca641f9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
60 additions
and
100 deletions
+60
-100
Eleve.hs
src/Gargantext/Text/Eleve.hs
+60
-100
No files found.
src/Gargantext/Text/Eleve.hs
View file @
7ee073cf
...
@@ -98,8 +98,8 @@ makeLenses ''I
...
@@ -98,8 +98,8 @@ makeLenses ''I
type
ModEntropy
i
o
e
=
(
e
->
e
)
->
i
->
o
type
ModEntropy
i
o
e
=
(
e
->
e
)
->
i
->
o
set_autonomy
::
Entropy
e
=>
ModEntropy
e
(
I
e
)
e
set_autonomy
::
Entropy
e
=>
ModEntropy
(
I
e
)
(
I
e
)
e
set_autonomy
f
e
=
I
e
nan
(
f
e
)
set_autonomy
f
e
i
=
i
&
info_autonomy
.~
fe
(
i
^.
info_entropy_var
)
set_entropy_var
::
Entropy
e
=>
Setter
e
(
I
e
)
e
e
set_entropy_var
::
Entropy
e
=>
Setter
e
(
I
e
)
e
e
set_entropy_var
f
e
=
(
\
ev
->
I
e
ev
nan
)
<$>
f
e
set_entropy_var
f
e
=
(
\
ev
->
I
e
ev
nan
)
<$>
f
e
...
@@ -141,9 +141,6 @@ data Trie k e
...
@@ -141,9 +141,6 @@ data Trie k e
makeLenses
''
T
rie
makeLenses
''
T
rie
insertTries
::
Ord
k
=>
[[
k
]]
->
Trie
k
()
insertTries
=
L
.
foldr
insertTrie
emptyTrie
insertTrie
::
Ord
k
=>
[
k
]
->
Trie
k
()
->
Trie
k
()
insertTrie
::
Ord
k
=>
[
k
]
->
Trie
k
()
->
Trie
k
()
insertTrie
[]
n
=
n
{
_node_count
=
_node_count
n
+
1
}
insertTrie
[]
n
=
n
{
_node_count
=
_node_count
n
+
1
}
insertTrie
(
x
:
xs
)
(
Leaf
c
)
=
mkTrie
(
c
+
1
)
$
Map
.
singleton
x
$
insertTrie
xs
emptyTrie
insertTrie
(
x
:
xs
)
(
Leaf
c
)
=
mkTrie
(
c
+
1
)
$
Map
.
singleton
x
$
insertTrie
xs
emptyTrie
...
@@ -169,19 +166,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
...
@@ -169,19 +166,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
entropyTrie
::
Entropy
e
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
entropyTrie
_
(
Leaf
c
)
=
Leaf
c
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
where
children'
=
Map
.
toList
children
sum_count
=
sum
$
_node_count
.
snd
<$>
children'
e
|
sum_count
==
0
=
nan
|
otherwise
=
sum
$
f
<$>
children'
f
(
k
,
child
)
=
if
pred
k
then
chc
*
P
.
logBase
2
(
fromIntegral
c
)
else
-
chc
*
P
.
logBase
2
chc
where
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
------------------------------------------------------------------------
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
...
@@ -193,25 +177,50 @@ nodeChildren (Leaf _) = Map.empty
...
@@ -193,25 +177,50 @@ nodeChildren (Leaf _) = Map.empty
-}
-}
chunkAlongEleve
::
Int
->
[
a
]
->
[[
a
]]
chunkAlongEleve
n
xs
=
L
.
take
n
<$>
L
.
tails
xs
data
Direction
=
Backward
|
Forward
buildTrie
::
Direction
->
Int
->
[[
Token
]]
->
Trie
Token
()
buildTrie
d
n
sentences
=
L
.
foldr
insertTrie
emptyTrie
.
L
.
concat
$
(
filter
(
/=
[
Terminal
(
term
d
)])
.
chunkAlongEleve
(
n
+
1
)
.
order
d
)
<$>
sentences
where
order
Forward
=
identity
order
Backward
=
reverse
term
Forward
=
Stop
term
Backward
=
Start
class
IsTrie
trie
where
class
IsTrie
trie
where
buildTrie
::
Entropy
e
=>
[[
Token
]]
->
trie
Token
e
entropyTrie
::
Entropy
e
=>
(
k
->
Bool
)
->
trie
k
()
->
trie
k
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
findTrieR
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
printTrie
::
(
Show
i
,
Entropy
e
)
=>
Getting
e
i
e
->
trie
Token
i
->
IO
()
printTrie
::
(
Show
i
,
Entropy
e
)
=>
Getting
e
i
e
->
trie
Token
i
->
IO
()
evTrie
::
Entropy
e
=>
Getting
e
i
e
->
Setter
i
o
e
e
->
trie
k
i
->
trie
k
o
evTrie
::
Entropy
e
=>
Getting
e
i
e
->
Setter
i
o
e
e
->
trie
k
i
->
trie
k
o
normalizeEntropy
::
Entropy
e
normalizeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
trie
k
i
->
trie
k
o
->
trie
k
i
->
trie
k
o
-- UNUSED
--nodeAutonomy :: (Ord k, Entropy e) => Getting e i e -> trie k i -> [k] -> e
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance
IsTrie
Trie
where
instance
IsTrie
Trie
where
buildTrie
ts
=
entropyTrie
isTerminal
$
insertTries
ts
entropyTrie
_
(
Leaf
c
)
=
Leaf
c
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
where
children'
=
Map
.
toList
children
sum_count
=
sum
$
_node_count
.
snd
<$>
children'
e
|
sum_count
==
0
=
nan
|
otherwise
=
sum
$
f
<$>
children'
f
(
k
,
child
)
=
if
pred
k
then
chc
*
P
.
logBase
2
(
fromIntegral
c
)
else
-
chc
*
P
.
logBase
2
chc
where
chc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
inE
(
Node
_
e
_
)
=
e
^.
inE
nodeEntropy
_
(
Leaf
_
)
=
nan
nodeEntropy
_
(
Leaf
_
)
=
nan
...
@@ -220,7 +229,6 @@ instance IsTrie Trie where
...
@@ -220,7 +229,6 @@ instance IsTrie Trie where
nodeChild
_
(
Leaf
_
)
=
emptyTrie
nodeChild
_
(
Leaf
_
)
=
emptyTrie
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
findTrie
ks
t
=
L
.
foldl
(
flip
nodeChild
)
t
ks
findTrieR
=
findTrie
printTrie
inE
t
=
do
printTrie
inE
t
=
do
P
.
putStrLn
.
Tree
.
drawTree
P
.
putStrLn
.
Tree
.
drawTree
...
@@ -271,26 +279,22 @@ data Tries k e = Tries
...
@@ -271,26 +279,22 @@ data Tries k e = Tries
makeLenses
''
T
ries
makeLenses
''
T
ries
nodeEntropySafe
::
Entropy
e
=>
Getting
e
i
e
->
Tries
k
i
->
e
buildTries
::
Int
->
[[
Token
]]
->
Tries
Token
()
nodeEntropySafe
inE
(
Tries
f
b
)
=
buildTries
n
sentences
=
Tries
mean
$
noNaNs
[
nodeEntropy
inE
f
,
nodeEntropy
inE
b
]
{
_fwd
=
buildTrie
Forward
n
sentences
,
_bwd
=
buildTrie
Backward
n
sentences
nodeEntropyBwdOpt
::
Entropy
e
=>
Getting
e
i
e
->
Tries
k
i
->
e
}
nodeEntropyBwdOpt
inE
(
Tries
f
b
)
=
mean
$
nodeEntropy
inE
f
:
noNaNs
[
nodeEntropy
inE
b
]
instance
IsTrie
Tries
where
instance
IsTrie
Tries
where
buildTrie
tts
=
Tries
{
_fwd
=
buildTrie
tts
,
_bwd
=
buildTrie
(
reverse
<$>
tts
)
}
nodeEntropy
inE
(
Tries
f
b
)
=
mean
[
nodeEntropy
inE
f
,
nodeEntropy
inE
b
]
nodeEntropy
inE
(
Tries
f
b
)
=
mean
[
nodeEntropy
inE
f
,
nodeEntropy
inE
b
]
findTrie
ks
=
onTries
(
findTrie
ks
)
findTrie
ks
(
Tries
f
b
)
=
Tries
(
findTrie
ks
f
)
(
findTrie
(
reverse
ks
)
b
)
findTrieR
ks
(
Tries
f
b
)
=
Tries
(
findTrieR
ks
f
)
(
findTrieR
(
reverse
ks
)
b
)
nodeChild
=
onTries
.
nodeChild
nodeChild
=
onTries
.
nodeChild
entropyTrie
=
onTries
.
entropyTrie
evTrie
inE
setEV
=
onTries
$
evTrie
inE
setEV
evTrie
inE
setEV
=
onTries
$
evTrie
inE
setEV
normalizeEntropy
inE
=
onTries
.
normalizeEntropy
inE
normalizeEntropy
inE
=
onTries
.
normalizeEntropy
inE
...
@@ -324,14 +328,14 @@ split inE t (x0:xs0) = go [x0] xs0
...
@@ -324,14 +328,14 @@ split inE t (x0:xs0) = go [x0] xs0
else
mayCons
pref
$
go
[
x
]
xs
else
mayCons
pref
$
go
[
x
]
xs
where
where
prefx
=
pref
<>
[
x
]
prefx
=
pref
<>
[
x
]
pt
=
findTrie
R
pref
t
pt
=
findTrie
pref
t
pxt
=
findTrie
R
prefx
t
pxt
=
findTrie
prefx
t
xt
=
findTrie
R
[
x
]
t
xt
=
findTrie
[
x
]
t
ept
=
ne
pt
ept
=
ne
pt
-- ^ entropy of the current prefix
-- ^ entropy of the current prefix
ext
=
ne
xt
ext
=
ne
xt
-- ^ entropy of [x]
-- ^ entropy of [x]
epxt
=
ne
pxt
epxt
=
ne
pxt
-- ^ entropy of the current prefix plus x
-- ^ entropy of the current prefix plus x
acc
=
P
.
isNaN
ept
||
P
.
isNaN
ext
||
not
(
P
.
isNaN
epxt
)
-- && (epxt > ept + ext)
acc
=
P
.
isNaN
ept
||
P
.
isNaN
ext
||
not
(
P
.
isNaN
epxt
)
-- && (epxt > ept + ext)
...
@@ -345,36 +349,16 @@ split inE t0 ts =
...
@@ -345,36 +349,16 @@ split inE t0 ts =
maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
-}
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
mainEleve
::
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve
::
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve
_
_
=
[]
mainEleve
n
input
=
map
(
map
printToken
)
.
split
info_autonomy
(
t
::
Tries
Token
(
I
Double
))
<$>
inp
{-
mainEleve n input = map (map printToken) . split identity (t :: Trie Token Double) <$> inp
where
where
inp
=
toToken
<$>
input
inp
=
toToken
<$>
input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
t
=
normalizeEntropy
info_entropy_var
set_autonomy
-}
.
evTrie
identity
set_entropy_var
.
entropyTrie
isTerminal
chunkAlongEleve
::
Int
->
[
a
]
->
[[
a
]]
$
buildTries
n
inp
chunkAlongEleve
n
xs
=
L
.
take
n
<$>
L
.
tails
xs
data
Order
=
Backward
|
Forward
toToken'
::
Order
->
Int
->
[[
Text
]]
->
[[
Token
]]
toToken'
o
n
input
=
L
.
concat
$
(
filter
(
/=
[
Terminal
(
term
o
)])
.
chunkAlongEleve
(
n
+
1
)
.
(
order
o
)
)
<$>
toToken
<$>
input
where
order
Forward
=
identity
order
Backward
=
reverse
term
Forward
=
Stop
term
Backward
=
Start
---------------------------------------------
---------------------------------------------
...
@@ -384,24 +368,11 @@ type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
...
@@ -384,24 +368,11 @@ type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
Checks
e
->
IO
Bool
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
Checks
e
->
IO
Bool
testEleve
debug
n
output
checks
=
do
testEleve
debug
n
output
checks
=
do
let
let
{-
pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
| ps <- L.nub $ [ c
| m <- [1..n]
, cs <- chunkAlong m 1 <$> inp
, c <- cs
]
]
-}
res
=
map
(
map
printToken
)
.
split
info_autonomy
nt
<$>
inp
res
=
map
(
map
printToken
)
.
split
info_autonomy
nt
<$>
inp
when
debug
$
do
when
debug
$
do
P
.
putStrLn
(
show
input
)
P
.
putStrLn
$
show
input
-- forM_ pss (P.putStrLn . show)
P
.
putStrLn
""
P
.
putStrLn
""
printTrie
info_entropy
nt
printTrie
info_entropy
nt
-- P.putStrLn ""
-- P.putStrLn "Entropy Var:"
-- printTrie identity t''
P
.
putStrLn
""
P
.
putStrLn
""
P
.
putStrLn
"Splitting:"
P
.
putStrLn
"Splitting:"
P
.
putStrLn
$
show
res
P
.
putStrLn
$
show
res
...
@@ -414,23 +385,11 @@ testEleve debug n output checks = do
...
@@ -414,23 +385,11 @@ testEleve debug n output checks = do
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
<$>
input
inp
=
toToken
<$>
input
t
::
Tries
Token
Double
t
=
-- buildTrie (toToken' n input)
Tries
{
_fwd
=
buildTrie
(
toToken'
Forward
n
input
)
,
_bwd
=
buildTrie
(
toToken'
Backward
n
input
)
}
evt
::
Tries
Token
(
I
Double
)
evt
=
evTrie
identity
set_entropy_var
t
nt
::
Tries
Token
(
I
Double
)
nt
::
Tries
Token
(
I
Double
)
nt
=
normalizeEntropy
info_entropy_var
(
\
fe
i
->
i
&
info_autonomy
.~
fe
(
i
^.
info_entropy_var
))
evt
nt
=
normalizeEntropy
info_entropy_var
set_autonomy
.
evTrie
identity
set_entropy_var
-- t'' :: Trie Token Double
.
entropyTrie
isTerminal
-- t'' = set_entropy_vars info_autonomy (\e _i -> e) nt
$
buildTries
n
inp
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
check
f
msg
ref
my
=
check
f
msg
ref
my
=
if
f
ref
my
if
f
ref
my
...
@@ -439,7 +398,7 @@ testEleve debug n output checks = do
...
@@ -439,7 +398,7 @@ testEleve debug n output checks = do
checker
(
ngram
,
count
,
entropy
,
ev
,
autonomy
,
fwd_entropy
,
fwd_ev
,
fwd_autonomy
,
bwd_entropy
,
bwd_ev
,
bwd_autonomy
)
=
do
checker
(
ngram
,
count
,
entropy
,
ev
,
autonomy
,
fwd_entropy
,
fwd_ev
,
fwd_autonomy
,
bwd_entropy
,
bwd_ev
,
bwd_autonomy
)
=
do
let
ns
=
parseToken
<$>
T
.
words
ngram
let
ns
=
parseToken
<$>
T
.
words
ngram
nt'
=
findTrie
R
ns
nt
nt'
=
findTrie
ns
nt
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
P
.
putStrLn
$
" "
<>
T
.
unpack
ngram
<>
":"
check
(
==
)
"count"
count
(
_node_count
(
_fwd
nt'
))
check
(
==
)
"count"
count
(
_node_count
(
_fwd
nt'
))
...
@@ -513,7 +472,8 @@ checks2 = []
...
@@ -513,7 +472,8 @@ checks2 = []
runTests
::
IO
()
runTests
::
IO
()
runTests
=
runTests
=
forM_
forM_
[(
"example0"
,
2
,
example0
,
checks0
)
[(
"example0"
,
3
,
example0
,
checks0
)
,(
"example0"
,
2
,
example0
,
[]
)
,(
"example1"
,
2
,
example1
,
[]
)
,(
"example1"
,
2
,
example1
,
[]
)
,(
"example2"
,
3
,
example2
,
checks2
)
,(
"example2"
,
3
,
example2
,
checks2
)
,(
"example3"
,
2
,
example3
,
[]
)
,(
"example3"
,
2
,
example3
,
[]
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment