Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
71dce4c8
Unverified
Commit
71dce4c8
authored
Jun 06, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Eleve...
parent
7bc24c2e
Pipeline
#445
failed with stage
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
46 additions
and
51 deletions
+46
-51
Eleve.hs
src/Gargantext/Text/Eleve.hs
+46
-51
No files found.
src/Gargantext/Text/Eleve.hs
View file @
71dce4c8
...
...
@@ -45,14 +45,14 @@ import Debug.Trace (trace)
-- import Debug.SimpleReflect
import
Control.Lens
(
Lens
'
,
Getting
,
(
^.
),
(
^?
),
view
,
makeLenses
,
_Just
)
import
Control.Monad
(
fo
ldM
,
mapM_
,
fo
rM_
)
import
Control.Monad
(
forM_
)
import
Data.Ord
(
Ord
)
import
qualified
Data.List
as
L
import
Data.Monoid
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
,
catMaybes
)
import
Data.Maybe
(
fromMaybe
)
import
qualified
Data.Map
as
Map
import
Gargantext.Prelude
hiding
(
cs
)
import
qualified
Data.Tree
as
Tree
...
...
@@ -161,7 +161,11 @@ updateIfDefined :: P.RealFloat e => e -> e -> e
updateIfDefined
e0
e
|
P
.
isNaN
e
=
e0
|
otherwise
=
e
entropyTrie
::
Floating
e
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
subst
::
Entropy
e
=>
(
e
,
e
)
->
e
->
e
subst
(
src
,
dst
)
x
|
sim
src
x
=
dst
|
otherwise
=
x
entropyTrie
::
Entropy
e
=>
(
k
->
Bool
)
->
Trie
k
()
->
Trie
k
e
entropyTrie
_
(
Leaf
c
)
=
Leaf
c
entropyTrie
pred
(
Node
c
()
children
)
=
Node
c
e
(
map
(
entropyTrie
pred
)
children
)
where
...
...
@@ -178,7 +182,7 @@ normalizeLevel = checkDiff (go . noNaNs)
where
-- checkDiff f es e = let e' = f es e in if e == e' then e' else trace ("normalizeLevel: diff " <> show e <> " " <> show e') e'
checkDiff
=
identity
go
[]
=
panic
"normalizeLevel: impossible"
--
go [] = panic "normalizeLevel: impossible"
-- trace "normalizeLevel"
-- go [_] = identity
go
es
=
\
e
->
(
e
-
m
)
/
v
...
...
@@ -201,7 +205,7 @@ nodeChildren (Leaf _) = Map.empty
-}
class
IsTrie
trie
where
buildTrie
::
Floating
e
=>
[[
Token
]]
->
trie
Token
e
buildTrie
::
Entropy
e
=>
[[
Token
]]
->
trie
Token
e
nodeEntropy
::
Entropy
e
=>
Getting
e
i
e
->
trie
k
i
->
e
nodeChild
::
Ord
k
=>
k
->
trie
k
e
->
trie
k
e
findTrie
::
Ord
k
=>
[
k
]
->
trie
k
e
->
trie
k
e
...
...
@@ -229,9 +233,12 @@ instance IsTrie Trie where
where
go
_
[]
_
=
panic
"normalizeEntropy' empty levels"
go
_
_
(
Leaf
c
)
=
Leaf
c
go
_
(
[]
:
_
)
_
=
panic
"normalizeEntropy': empty level"
go
f
(
es
:
ess
)
(
Node
c
i
children
)
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
es
)
ess
<$>
children
-- go _ ([] : _) _ = panic "normalizeEntropy': empty level"
go
f
(
es
:
ess
)
(
Node
c
i
children
)
-- | any (sim (i ^. inE)) es
=
Node
c
(
f
i
)
$
go
(
modE
$
normalizeLevel
es
)
ess
<$>
children
-- | otherwise
-- = panic "NOT an elem"
{-
...
...
@@ -361,11 +368,19 @@ testEleve debug n output checks = do
res
=
map
(
map
printToken
)
.
split
info_autonomy
nt
<$>
inp
when
debug
$
do
P
.
putStrLn
(
show
input
)
-- mapM_ (P.putStrLn . show) pss
-- forM_ pss (P.putStrLn . show)
P
.
putStrLn
""
P
.
putStrLn
"Levels:"
forM_
(
entropyLevels
identity
(
_fwd
t
))
$
\
level
->
P
.
putStrLn
$
" "
<>
show
level
P
.
putStrLn
""
-- printTrie nt
P
.
putStrLn
"Forward:"
printTrie
(
_fwd
nt
)
P
.
putStrLn
""
P
.
putStrLn
"Backward:"
printTrie
(
_bwd
nt
)
P
.
putStrLn
""
P
.
putStrLn
"Splitting:"
P
.
putStrLn
$
show
res
forM_
checks
checker
pure
$
expected
==
res
...
...
@@ -375,7 +390,7 @@ testEleve debug n output checks = do
expected
=
fmap
(
T
.
splitOn
"-"
)
<$>
out
input
=
(
T
.
splitOn
"-"
=<<
)
<$>
out
inp
=
toToken
<$>
input
t
=
buildTrie
$
L
.
concat
$
chunkAlongEleve
(
n
+
2
)
<$>
inp
t
=
buildTrie
$
L
.
concat
$
(
filter
(
/=
[
Terminal
Stop
])
.
chunkAlongEleve
(
n
+
2
)
)
<$>
inp
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt
=
normalizeEntropy
identity
set_autonomy
t
...
...
@@ -417,51 +432,31 @@ example6 = ["le-petit chat"
checks0
,
checks2
::
[(
Text
,
Int
,
Double
,
Double
,
Double
,
Double
,
Double
)]
checks0
=
{-
[("<start> New", 1, nan, nan, nan, nan, 0.0)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, nan, 1.584962500721156)
checks0
=
[(
"<start>"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,(
"New"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
1.584962500721156
,
0.0
)
,(
"York"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
0.0
,
1.584962500721156
)
,(
"is"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,(
"and"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,(
"<stop>"
,
0
,
nan
,
nan
,
nan
,
0.0
,
nan
)
,(
"<start> New"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
nan
,
1.584962500721156
)
,(
"York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, nan, 1.584962500721156)
,(
"York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, nan, 1.584962500721156)
,(
"York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)
]-}
[(
"<start>"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
1.584962500721156
,
0.0
)
,
(
"York"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
0.0
,
1.584962500721156
)
,
(
"is"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,
(
"New"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
1.584962500721156
,
0.0
)
,
(
"York"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
0.0
,
1.584962500721156
)
,
(
"and"
,
1
,
0.0
,
-
2.113283334294875
,
-
0.5000000000000002
,
0.0
,
0.0
)
,
(
"New"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
1.584962500721156
,
0.0
)
,
(
"York"
,
3
,
0.792481250360578
,
-
1.3208020839342969
,
0.7499999999999999
,
0.0
,
1.584962500721156
)
,
(
"<stop>"
,
0
,
nan
,
nan
,
nan
,
0.0
,
nan
)]
<>
[(
"<start> New"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
nan
,
1.584962500721156
)
,
(
"York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
nan
,
1.584962500721156
)
,
(
"York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York"
,
3
,
1.584962500721156
,
1.584962500721156
,
1.4142135623730951
,
nan
,
1.584962500721156
)
,
(
"York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)]
<>
[(
"<start> New York"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"York is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"is New York"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"York and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"and New York"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,
(
"New York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)
,
(
"York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)
,
(
"<stop>"
,
0
,
nan
,
nan
,
nan
,
0.0
,
nan
)
,
(
""
,
9
,
2.113283334294875
,
nan
,
nan
,
2.113283334294875
,
2.113283334294875
)]
,(
"<start> New York"
,
1
,
nan
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York is"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"York is New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"is New York"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York and"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"York and New"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"and New York"
,
1
,
0.0
,
nan
,
nan
,
nan
,
0.0
)
,(
"New York <stop>"
,
1
,
nan
,
nan
,
nan
,
nan
,
nan
)
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment