Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
196
Issues
196
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
b9156ba3
Unverified
Commit
b9156ba3
authored
May 24, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Eleve: better but still not working
parent
934e77be
Pipeline
#411
canceled with stage
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
101 additions
and
61 deletions
+101
-61
Eleve.hs
src/Gargantext/Text/Eleve.hs
+101
-61
No files found.
src/Gargantext/Text/Eleve.hs
View file @
b9156ba3
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-
{-
Implementation of EleVe Python version of papers:
Implementation of EleVe Python version of papers:
NP:
* The node count is correct and we should not regress on this front.
-}
-}
module
Gargantext.Text.Eleve
where
module
Gargantext.Text.Eleve
where
import
Debug.Trace
(
trace
)
import
Debug.Trace
(
trace
)
import
Debug.SimpleReflect
--
import Debug.SimpleReflect
import
Control.Lens
(
Lens
,
Lens
'
,
ASetter
,
Getting
,
(
^.
),
(
^?
),
(
&
),
(
.~
),
(
%~
),
view
,
makeLenses
,
_Just
)
import
Control.Monad
(
foldM
)
import
Control.Monad
(
foldM
)
import
Data.Ord
(
Ord
)
import
Data.Ord
(
Ord
)
import
qualified
Data.List
as
L
import
qualified
Data.List
as
L
...
@@ -18,9 +23,9 @@ import Data.Monoid
...
@@ -18,9 +23,9 @@ import Data.Monoid
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Maybe
(
fromMaybe
,
catMaybes
)
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
import
Gargantext.Prelude
import
Gargantext.Prelude
hiding
(
cs
)
import
qualified
Data.Tree
as
Tree
import
qualified
Data.Tree
as
Tree
import
Data.Tree
(
Tree
)
import
Data.Tree
(
Tree
)
import
qualified
Prelude
as
P
(
putStrLn
,
logBase
,
String
)
import
qualified
Prelude
as
P
(
putStrLn
,
logBase
,
String
)
...
@@ -29,17 +34,34 @@ import qualified Prelude as P (putStrLn, logBase, String)
...
@@ -29,17 +34,34 @@ import qualified Prelude as P (putStrLn, logBase, String)
-- TODO maybe add Leaf
-- TODO maybe add Leaf
-- NP: I think Leaf is an optimisation (less data, a tiny bit more code and time)
-- NP: I think Leaf is an optimisation (less data, a tiny bit more code and time)
--test = split t ts
data
I
e
=
I
{
_info_entropy
::
e
,
_info_norm_entropy
::
e
,
_info_norm_entropy'
::
e
}
instance
Show
e
=>
Show
(
I
e
)
where
show
(
I
e
n
n'
)
=
show
(
e
,
n
,
n'
)
makeLenses
''
I
type
ModEntropy
i
o
e
=
(
e
->
e
)
->
i
->
o
setNormEntropy
::
ModEntropy
e
(
I
e
)
e
setNormEntropy
f
e
=
I
e
(
f
e
)
e
-- (panic "setNormEntropy")
test
n
example
=
do
test
n
example
=
do
let
let
ex
=
toToken
n
example
ex
=
toToken
n
example
t
=
buildTrie
$
chunkAlong
n
1
ex
t
=
buildTrie
$
chunkAlong
n
1
ex
nt
=
normalizeEntropy
identity
setNormEntropy
(
t
::
Trie
Token
Double
)
nt'
=
normalizeEntropy'
info_entropy
(
\
f
->
info_norm_entropy'
%~
f
)
nt
P
.
putStrLn
$
Tree
.
drawTree
P
.
putStrLn
$
Tree
.
drawTree
$
fmap
show
$
fmap
show
$
toTree
(
NonTerminal
""
)
t
$
toTree
(
NonTerminal
""
)
nt'
pure
$
map
unToken
$
split
t
t
[]
ex
pure
$
map
unToken
$
split
info_entropy
nt'
ex
example'
=
T
.
words
"New York and New York"
example'
=
T
.
words
"New York and New York"
...
@@ -68,31 +90,32 @@ data Trie k e
...
@@ -68,31 +90,32 @@ data Trie k e
|
Leaf
{
_node_count
::
Int
}
|
Leaf
{
_node_count
::
Int
}
deriving
(
Show
)
deriving
(
Show
)
toTree
::
k
->
Trie
k
e
->
Tree
(
k
,
Int
,
e
)
toTree
::
k
->
Trie
k
e
->
Tree
(
k
,
Int
,
Maybe
e
)
toTree
k
(
Node
c
e
cs
)
=
Tree
.
Node
(
k
,
c
,
e
)
(
map
(
uncurry
toTree
)
$
Map
.
toList
cs
)
toTree
k
(
Leaf
c
)
=
Tree
.
Node
(
k
,
c
,
Nothing
)
[]
toTree
k
(
Node
c
e
cs
)
=
Tree
.
Node
(
k
,
c
,
Just
e
)
(
map
(
uncurry
toTree
)
$
Map
.
toList
cs
)
-- emptyTrie :: Trie k e
-- emptyTrie ::
(Ord k, Monoid e) =>
Trie k e
emptyTrie
::
(
Ord
k
,
Monoid
e
)
=>
Int
->
Trie
k
e
-- emptyTrie = Node 0 mempty mempty
--emptyTrie n = Node n mempty mempty
emptyTrie
::
Trie
k
e
emptyTrie
=
Leaf
emptyTrie
=
Leaf
0
mkTrie
::
Monoid
e
=>
Int
->
Map
k
(
Trie
k
e
)
->
Trie
k
e
mkTrie
::
Monoid
e
=>
Int
->
Map
k
(
Trie
k
e
)
->
Trie
k
e
mkTrie
c
children
mkTrie
c
children
{-
| Map.null children = Leaf c
|
Map
.
null
children
=
Leaf
c
| otherwise
-}
=
Node
c
mempty
children
|
otherwise
=
Node
c
mempty
children
insertTrie
::
Ord
k
=>
[
k
]
->
Trie
k
()
->
Trie
k
()
insertTrie
::
Ord
k
=>
[
k
]
->
Trie
k
()
->
Trie
k
()
insertTrie
[]
n
=
n
{
_node_count
=
_node_count
n
+
1
}
insertTrie
[]
n
=
n
{
_node_count
=
_node_count
n
+
1
}
-- insertTrie (x:xs) (Leaf c) = mkTrie (c+1) (Map.singleton x $ insertTrie xs emptyTrie)
insertTrie
(
x
:
xs
)
(
Leaf
c
)
=
mkTrie
(
c
+
1
)
$
Map
.
singleton
x
$
insertTrie
xs
emptyTrie
insertTrie
(
x
:
xs
)
(
Node
c
_e
children
)
=
mkTrie
(
c
+
1
)
$
Map
.
alter
f
x
children
insertTrie
(
x
:
xs
)
(
Node
c
_e
children
)
=
mkTrie
(
c
+
1
)
$
Map
.
alter
f
x
children
where
where
f
=
Just
.
insertTrie
xs
.
fromMaybe
(
emptyTrie
0
)
f
=
Just
.
insertTrie
xs
.
fromMaybe
emptyTrie
insertTries
::
Ord
k
=>
[[
k
]]
->
Trie
k
()
insertTries
::
Ord
k
=>
[[
k
]]
->
Trie
k
()
insertTries
=
L
.
foldr
insertTrie
(
emptyTrie
1
)
insertTries
=
L
.
foldr
insertTrie
emptyTrie
entropyTrie
::
(
Num
e
,
Floating
e
)
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
entropyTrie
::
(
Num
e
,
Floating
e
)
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
--
entropyTrie _ (Leaf c) = Leaf c
entropyTrie
_
(
Leaf
c
)
=
Leaf
c
entropyTrie
pred
(
Node
c
_e
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
entropyTrie
pred
(
Node
c
_e
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
where
where
e
=
sum
$
map
f
$
Map
.
toList
children
e
=
sum
$
map
f
$
Map
.
toList
children
...
@@ -101,63 +124,80 @@ entropyTrie pred (Node c _e children) = Node c e (map (entropyTrie pred) childre
...
@@ -101,63 +124,80 @@ entropyTrie pred (Node c _e children) = Node c e (map (entropyTrie pred) childre
where
where
cfc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
cfc
=
fromIntegral
(
_node_count
child
)
/
fromIntegral
c
normalizeEntropy
::
(
Fractional
e
,
Floating
e
,
Show
e
)
=>
Trie
k
e
->
Trie
k
e
normalizeEntropy
::
(
Fractional
e
,
Floating
e
,
Show
e
)
-- normalizeEntropy (Leaf c) = Leaf c
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
Trie
k
i
->
Trie
k
o
normalizeEntropy
(
Node
c
e
children
)
=
normalizeEntropy
inE
modE
=
go
$
modE
identity
trace
(
show
$
L
.
length
es
)
$
Node
c
e
$
map
(
normalizeLevel
m
v
.
normalizeEntropy
)
children
where
where
es
=
map
_node_entropy
$
Map
.
elems
children
go
_
(
Leaf
c
)
=
Leaf
c
m
=
mean
es
go
f
(
Node
c
i
children
)
|
not
(
Map
.
null
children
)
=
v
=
deviation
es
-- trace (show $ L.length es) $
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
m
v
)
<$>
children
where
es
=
[
i'
^.
inE
|
Node
_
i'
_
<-
Map
.
elems
children
]
m
=
mean
es
v
=
deviation
es
normalizeLevel
::
(
Fractional
e
,
Floating
e
,
Show
e
)
=>
e
->
e
->
Trie
k
e
->
Trie
k
e
normalizeLevel
::
(
Fractional
e
,
Floating
e
,
Show
e
)
-- normalizeLevel _ _ (Leaf c) = Leaf c
=>
e
->
e
->
e
->
e
--normalizeLevel m v n = n { _node_entropy = (_node_entropy n - m) }
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
normalizeLevel
m
v
n
=
trace
(
show
(
_node_entropy
n
,
m
,
v
))
$
n
{
_node_entropy
=
(
_node_entropy
n
-
m
)
/
v
}
buildTrie
::
(
Floating
e
,
Show
e
)
=>
[[
Token
]]
->
Trie
Token
e
buildTrie
::
(
Floating
e
,
Show
e
)
=>
[[
Token
]]
->
Trie
Token
e
buildTrie
=
normalizeEntropy
.
entropyTrie
(
==
Terminal
)
.
insertTries
buildTrie
=
entropyTrie
(
==
Terminal
)
.
insertTries
subForest
::
Trie
k
e
->
[
Trie
k
e
]
subForest
::
Trie
k
e
->
[
Trie
k
e
]
--
subForest (Leaf _) = []
subForest
(
Leaf
_
)
=
[]
subForest
(
Node
_
_
children
)
=
Map
.
elems
children
subForest
(
Node
_
_
children
)
=
Map
.
elems
children
nodeEntropy
::
Trie
k
e
->
Maybe
e
nodeEntropy
(
Node
_
e
_
)
=
Just
e
nodeEntropy
(
Leaf
_
)
=
Nothing
nodeChildren
::
Trie
k
e
->
Map
k
(
Trie
k
e
)
nodeChildren
(
Node
_
_
cs
)
=
cs
nodeChildren
(
Leaf
_
)
=
Map
.
empty
nodeChild
::
Ord
k
=>
k
->
Trie
k
e
->
Maybe
(
Trie
k
e
)
nodeChild
k
(
Node
_
_
cs
)
=
Map
.
lookup
k
cs
nodeChild
_
(
Leaf
_
)
=
Nothing
levels
::
Trie
k
e
->
[[
Trie
k
e
]]
levels
::
Trie
k
e
->
[[
Trie
k
e
]]
levels
=
L
.
takeWhile
(
not
.
L
.
null
)
.
L
.
iterate
(
L
.
concatMap
subForest
)
.
pure
levels
=
L
.
takeWhile
(
not
.
L
.
null
)
.
L
.
iterate
(
L
.
concatMap
subForest
)
.
pure
entropyLevels
::
Trie
k
e
->
[[
e
]]
entropyLevels
::
Getting
e
i
e
->
Trie
k
i
->
[[
e
]]
entropyLevels
=
fmap
(
fmap
_node_e
ntropy
)
.
levels
entropyLevels
inE
=
fmap
(
fmap
(
view
inE
)
.
catMaybes
.
fmap
nodeE
ntropy
)
.
levels
normalizeEntropy'
::
(
Floating
e
,
Show
e
)
=>
Trie
k
e
->
Trie
k
e
--fwd :: Getting a s a -> ASetter s t u3 a -> s -> t
normalizeEntropy'
t
=
go
(
entropyLevels
t
)
t
--fwd inE outE s = s & outE .~ (s ^. inE)
normalizeEntropy'
::
(
Fractional
e
,
Floating
e
,
Show
e
)
=>
Getting
e
i
e
->
ModEntropy
i
o
e
->
Trie
k
i
->
Trie
k
o
normalizeEntropy'
inE
modE
t
=
go
(
modE
identity
)
(
entropyLevels
inE
t
)
t
where
where
go
::
(
Floating
e
,
Show
e
)
=>
[[
e
]]
->
Trie
k
e
->
Trie
k
e
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
[]
_
=
panic
"normalizeEntropy' empty levels"
go
_
_
(
Leaf
c
)
=
Leaf
c
-- go _ (Leaf c) = Leaf c
go
_
(
[]
:
_
)
_
=
panic
"normalizeEntropy': empty level"
go
(
es
:
ess
)
(
Node
c
e
children
)
=
go
f
(
es
:
ess
)
(
Node
c
i
children
)
=
Node
c
e
(
normalizeLevel
m
v
.
go
ess
<$>
children
)
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
m
v
)
ess
<$>
children
where
where
m
=
mean
es
m
=
mean
es
v
=
deviation
es
v
=
deviation
es
buildTrie'
::
(
Floating
e
,
Show
e
)
=>
[[
Token
]]
->
Trie
Token
e
buildTrie'
=
normalizeEntropy'
.
entropyTrie
(
==
Terminal
)
.
insertTries
------------------------------------------------------------------------
------------------------------------------------------------------------
autonomie
::
Trie
Token
e
->
Token
->
e
split
::
(
Num
e
,
Ord
e
,
Show
e
)
=>
Lens'
i
e
->
Trie
Token
i
->
[
Token
]
->
[[
Token
]]
autonomie
trie
t
=
case
(
Map
.
lookup
t
(
_node_children
trie
))
of
split
inE
t0
=
go
t0
[]
Nothing
->
panic
$
"Gargantext.Text.Ngrams: autonomie"
<>
(
cs
$
show
t
)
where
Just
a
->
_node_entropy
a
ne
d
t
=
fromMaybe
d
(
nodeEntropy
t
^?
_Just
.
inE
)
go
_
pref
[]
=
[
reverse
pref
]
------------------------------------------------------------------------
go
t
pref
(
x
:
xs
)
=
case
nodeChild
x
t
of
Nothing
->
reverse
pref
:
go
t0
[
x
]
xs
split
::
(
Num
e
,
Ord
e
)
=>
Trie
Token
e
->
Trie
Token
e
->
[
Token
]
->
[
Token
]
->
[[
Token
]]
Just
a
->
case
nodeChild
x
t0
of
split
_
_
pref
[]
=
[
reverse
pref
]
Nothing
->
panic
"TODO"
split
t0
t
pref
(
x
:
xs
)
=
case
Map
.
lookup
x
$
_node_children
t
of
Just
xt0
->
Nothing
->
reverse
pref
:
split
t0
t0
[
x
]
xs
let
et
=
ne
(
panic
"t"
)
t
Just
a
->
case
Map
.
lookup
x
$
_node_children
t0
of
ext0
=
ne
(
panic
"xt0"
)
xt0
Nothing
->
panic
"TODO"
-- reverse pref : split t0 t0 [] xs
ea
=
ne
(
-
42
)
a
Just
xt0
->
case
_node_entropy
t
+
_node_entropy
xt0
>
_node_entropy
a
of
in
trace
(
show
(
et
,
ext0
,
ea
))
$
True
->
split
t0
a
(
x
:
pref
)
xs
case
et
+
ext0
>
ea
of
False
->
reverse
pref
:
split
t0
xt0
[
x
]
xs
True
->
go
a
(
x
:
pref
)
xs
False
->
reverse
pref
:
go
xt0
[
x
]
xs
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment