Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
842cbf68
Unverified
Commit
842cbf68
authored
Jun 13, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ElEve: alternative split
parent
7ee073cf
Pipeline
#472
failed with stage
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
6 deletions
+21
-6
Eleve.hs
src/Gargantext/Text/Eleve.hs
+21
-6
No files found.
src/Gargantext/Text/Eleve.hs
View file @
842cbf68
...
@@ -115,6 +115,9 @@ isTerminal :: Token -> Bool
...
@@ -115,6 +115,9 @@ isTerminal :: Token -> Bool
isTerminal
(
Terminal
_
)
=
True
isTerminal
(
Terminal
_
)
=
True
isTerminal
(
NonTerminal
_
)
=
False
isTerminal
(
NonTerminal
_
)
=
False
nonTerminals
::
[
Token
]
->
[
Text
]
nonTerminals
ts
=
[
nt
|
NonTerminal
nt
<-
ts
]
parseToken
::
Text
->
Token
parseToken
::
Text
->
Token
parseToken
"<start>"
=
Terminal
Start
parseToken
"<start>"
=
Terminal
Start
parseToken
"<stop>"
=
Terminal
Stop
parseToken
"<stop>"
=
Terminal
Stop
...
@@ -310,14 +313,16 @@ onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
...
@@ -310,14 +313,16 @@ onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
onTries
h
(
Tries
f
b
)
=
Tries
(
h
f
)
(
h
b
)
onTries
h
(
Tries
f
b
)
=
Tries
(
h
f
)
(
h
b
)
------------------------------------------------------------------------
------------------------------------------------------------------------
mayCons
::
[
a
]
->
[[
a
]]
->
[[
a
]]
mayCons
[]
xss
=
xss
mayCons
xs
xss
=
xs
:
xss
{-
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split _ _ [] = []
split _ _ [] = []
split inE t (Terminal Start:xs) = split inE t xs
split inE t (Terminal Start:xs) = split inE t xs
split inE t (x0:xs0) = go [x0] xs0
split inE t (x0:xs0) = go [x0] xs0
where
where
mayCons
[]
xss
=
xss
mayCons
xs
xss
=
xs
:
xss
go pref [] = [pref]
go pref [] = [pref]
go pref (Terminal Stop:_) = [pref]
go pref (Terminal Stop:_) = [pref]
go _ (Terminal Start:_) = panic "split impossible"
go _ (Terminal Start:_) = panic "split impossible"
...
@@ -337,11 +342,21 @@ split inE t (x0:xs0) = go [x0] xs0
...
@@ -337,11 +342,21 @@ split inE t (x0:xs0) = go [x0] xs0
-- ^ entropy of [x]
-- ^ entropy of [x]
epxt = ne pxt
epxt = ne pxt
-- ^ entropy of the current prefix plus x
-- ^ entropy of the current prefix plus x
acc
=
P
.
isNaN
ept
||
P
.
isNaN
ext
||
not
(
P
.
isNaN
epxt
)
-- && (epxt >
ept + ext
)
acc = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt >
mean [ept, ext]
)
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
ne = nodeEntropy inE
ne = nodeEntropy inE
-}
split
::
Entropy
e
=>
Int
->
Lens'
i
e
->
Tries
Token
i
->
[
Token
]
->
[[
Text
]]
split
_
_
_
[]
=
[]
split
_
_
_
[
t
]
=
pure
<$>
nonTerminals
[
t
]
split
n
inE
t
ts
=
nonTerminals
pref
`
mayCons
`
split
n
inE
t
(
drop
(
length
pref
)
ts
)
where
pref
=
maximumWith
(
\
ks
->
nodeEntropy
inE
$
findTrie
ks
t
)
(
L
.
tail
.
L
.
inits
.
take
n
$
ts
)
{-
{-
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
...
@@ -352,7 +367,7 @@ split inE t0 ts =
...
@@ -352,7 +367,7 @@ split inE t0 ts =
------------------------------------------------------------------------
------------------------------------------------------------------------
mainEleve
::
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve
::
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve
n
input
=
map
(
map
printToken
)
.
split
info_autonomy
(
t
::
Tries
Token
(
I
Double
))
<$>
inp
mainEleve
n
input
=
split
n
info_autonomy
(
t
::
Tries
Token
(
I
Double
))
<$>
inp
where
where
inp
=
toToken
<$>
input
inp
=
toToken
<$>
input
t
=
normalizeEntropy
info_entropy_var
set_autonomy
t
=
normalizeEntropy
info_entropy_var
set_autonomy
...
@@ -368,7 +383,7 @@ type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
...
@@ -368,7 +383,7 @@ type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
Checks
e
->
IO
Bool
testEleve
::
e
~
Double
=>
Bool
->
Int
->
[
Text
]
->
Checks
e
->
IO
Bool
testEleve
debug
n
output
checks
=
do
testEleve
debug
n
output
checks
=
do
let
let
res
=
map
(
map
printToken
)
.
split
info_autonomy
nt
<$>
inp
res
=
split
n
info_autonomy
nt
<$>
inp
when
debug
$
do
when
debug
$
do
P
.
putStrLn
$
show
input
P
.
putStrLn
$
show
input
P
.
putStrLn
""
P
.
putStrLn
""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment