Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
dbff4c96
Commit
dbff4c96
authored
May 02, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT/STEM] copying Porter lib.
parent
0c615cc4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
154 additions
and
0 deletions
+154
-0
En.hs
src/Gargantext/Ngrams/Stem/En.hs
+154
-0
No files found.
src/Gargantext/Ngrams/Stem/En.hs
0 → 100644
View file @
dbff4c96
module
Language.Porter
(
stem
,
fixstem
)
where
import
Control.Monad
import
Data.Maybe
import
Data.List
isConsonant
str
i
|
c
`
elem
`
"aeiou"
=
False
|
c
==
'y'
=
i
==
0
||
isVowel
str
(
i
-
1
)
|
otherwise
=
True
where
c
=
str
!!
i
isVowel
=
(
not
.
)
.
isConsonant
byIndex
fun
str
=
fun
str
[
0
..
length
str
-
1
]
measure
=
length
.
filter
not
.
init
.
(
True
:
)
.
map
head
.
group
.
byIndex
(
map
.
isConsonant
)
containsVowel
=
byIndex
(
any
.
isVowel
)
endsWithDouble
=
startsWithDouble
.
reverse
where
startsWithDouble
l
|
length
l
<
2
=
False
|
otherwise
=
let
(
x
:
y
:
_
)
=
l
in
x
==
y
&&
x
`
notElem
`
"aeiou"
cvc
word
|
length
word
<
3
=
False
|
otherwise
=
isConsonant
word
lastIndex
&&
isVowel
word
(
lastIndex
-
1
)
&&
isConsonant
word
(
lastIndex
-
2
)
&&
last
word
`
notElem
`
"wxy"
where
lastIndex
=
length
word
-
1
statefulReplace
predicate
str
end
replacement
|
end
`
isSuffixOf
`
str
=
Just
replaced
|
otherwise
=
Nothing
where
part
=
take
(
length
str
-
length
end
)
str
replaced
|
predicate
part
=
Right
(
part
++
replacement
)
|
otherwise
=
Left
str
replaceEnd
predicate
str
end
replacement
=
do
result
<-
statefulReplace
predicate
str
end
replacement
return
(
either
id
id
result
)
findStem
f
word
pairs
=
msum
$
map
(
uncurry
(
replaceEnd
f
word
))
pairs
measureGT
=
flip
((
>
)
.
measure
)
step1a
word
=
fromMaybe
word
result
where
result
=
findStem
(
const
True
)
word
[(
"sses"
,
"ss"
),
(
"ies"
,
"i"
),
(
"ss"
,
"ss"
),
(
"s"
,
""
)]
beforeStep1b
word
=
fromMaybe
(
Left
word
)
result
where
cond23
x
=
do
{
v
<-
x
;
either
(
const
Nothing
)
(
return
.
Right
)
v
}
cond1
x
=
do
{
v
<-
x
;
return
(
Left
v
)
}
result
=
cond1
(
replaceEnd
(
measureGT
0
)
word
"eed"
"ee"
)
`
mplus
`
cond23
(
statefulReplace
containsVowel
word
"ed"
""
)
`
mplus
`
cond23
(
statefulReplace
containsVowel
word
"ing"
""
)
afterStep1b
word
=
fromMaybe
word
result
where
double
=
endsWithDouble
word
&&
not
(
any
((`
isSuffixOf
`
word
)
.
return
)
"lsz"
)
mEq1AndCvc
=
measure
word
==
1
&&
cvc
word
iif
cond
val
=
if
cond
then
Just
val
else
Nothing
result
=
findStem
(
const
True
)
word
[(
"at"
,
"ate"
),
(
"bl"
,
"ble"
),
(
"iz"
,
"ize"
)]
`
mplus
`
iif
double
(
init
word
)
`
mplus
`
iif
mEq1AndCvc
(
word
++
"e"
)
step1b
=
either
id
afterStep1b
.
beforeStep1b
step1c
word
=
fromMaybe
word
result
where
result
=
replaceEnd
containsVowel
word
"y"
"i"
step1
=
step1c
.
step1b
.
step1a
step2
word
=
fromMaybe
word
result
where
result
=
findStem
(
measureGT
0
)
word
[
(
"ational"
,
"ate"
)
,
(
"tional"
,
"tion"
)
,
(
"enci"
,
"ence"
)
,
(
"anci"
,
"ance"
)
,
(
"izer"
,
"ize"
)
,
(
"bli"
,
"ble"
)
,
(
"alli"
,
"al"
)
,
(
"entli"
,
"ent"
)
,
(
"eli"
,
"e"
)
,
(
"ousli"
,
"ous"
)
,
(
"ization"
,
"ize"
)
,
(
"ation"
,
"ate"
)
,
(
"ator"
,
"ate"
)
,
(
"alism"
,
"al"
)
,
(
"iveness"
,
"ive"
)
,
(
"fulness"
,
"ful"
)
,
(
"ousness"
,
"ous"
)
,
(
"aliti"
,
"al"
)
,
(
"iviti"
,
"ive"
)
,
(
"biliti"
,
"ble"
)
,
(
"logi"
,
"log"
)
]
step3
word
=
fromMaybe
word
result
where
result
=
findStem
(
measureGT
0
)
word
[
(
"icate"
,
"ic"
)
,
(
"ative"
,
""
)
,
(
"alize"
,
"al"
)
,
(
"iciti"
,
"ic"
)
,
(
"ical"
,
"ic"
)
,
(
"ful"
,
""
)
,
(
"ness"
,
""
)
]
step4
word
=
fromMaybe
word
result
where
gt1andST
str
=
(
measureGT
1
)
str
&&
any
((`
isSuffixOf
`
str
)
.
return
)
"st"
findGT1
=
findStem
(
measureGT
1
)
word
.
map
(
flip
(,)
""
)
result
=
(
findGT1
[
"al"
,
"ance"
,
"ence"
,
"er"
,
"ic"
,
"able"
,
"ible"
,
"ant"
,
"ement"
,
"ment"
,
"ent"
])
`
mplus
`
(
findStem
gt1andST
word
[(
"ion"
,
""
)])
`
mplus
`
(
findGT1
[
"ou"
,
"ism"
,
"ate"
,
"iti"
,
"ous"
,
"ive"
,
"ize"
])
step5a
word
=
fromMaybe
word
result
where
test
str
=
(
measureGT
1
str
)
||
((
measure
str
==
1
)
&&
(
not
$
cvc
str
))
result
=
replaceEnd
test
word
"e"
""
step5b
word
=
fromMaybe
word
result
where
cond
s
=
last
s
==
'l'
&&
measureGT
1
s
result
=
replaceEnd
cond
word
"l"
""
step5
=
step5b
.
step5a
allSteps
=
step5
.
step4
.
step3
.
step2
.
step1
stem
s
|
length
s
<
3
=
s
|
otherwise
=
allSteps
s
fixpoint
f
x
=
let
fx
=
f
x
in
if
fx
==
x
then
x
else
fixpoint
f
fx
fixstem
=
fixpoint
stem
{-
main :: IO ()
main = do
content <- readFile "input.txt"
writeFile "output.txt" $ unlines $ map stem $ lines content
-}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment