Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f12df281
Commit
f12df281
authored
Nov 22, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[NGRAMS] improving ngrams extraction with prep (of/de) respectively in eng/fr.
parent
ea51f50d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
5 deletions
+32
-5
En.hs
src/Data/Gargantext/Ngrams/Lang/En.hs
+6
-1
Fr.hs
src/Data/Gargantext/Ngrams/Lang/Fr.hs
+5
-1
En.hs
test/Ngrams/Lang/En.hs
+15
-3
Fr.hs
test/Ngrams/Lang/Fr.hs
+6
-0
No files found.
src/Data/Gargantext/Ngrams/Lang/En.hs
View file @
f12df281
...
...
@@ -47,6 +47,7 @@ groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN",
groupNgrams
((
x
,
"NNP"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
y
,
"NP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
y
,
"NNS"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NP"
,
_
)
:
(
y
,
"NP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
...
...
@@ -57,7 +58,11 @@ groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN",
-- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
-- > should be (antiinflammatory activity) <> (analgesic activity)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
"of"
,
"IN"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
"of"
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
y
,
"NNP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
det
,
"DT"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
det
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
det
,
"DT"
,
_
)
:
(
y
,
"NNP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
det
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
_
,
"PERSON"
)
:
(
y
,
yy
,
"PERSON"
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
yy
,
"PERSON"
)
:
xs
)
groupNgrams
((
x
,
_
,
"ORGANIZATION"
)
:
(
y
,
yy
,
"ORGANIZATION"
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
yy
,
"ORGANIZATION"
)
:
xs
)
...
...
src/Data/Gargantext/Ngrams/Lang/Fr.hs
View file @
f12df281
...
...
@@ -22,6 +22,8 @@ selectNgrams xs = pf selectNgrams' xs
groupNgrams
::
[(
Text
,
Text
,
Text
)]
->
[(
Text
,
Text
,
Text
)]
groupNgrams
[]
=
[]
--groupNgrams ((_,"DET",_):xs) = groupNgrams xs
-- "Groupe : nom commun et adjectifs avec conjonction"
groupNgrams
((
n
,
"NC"
,
n'
)
:
(
j1
,
"ADJ"
,
_
)
:
(
_
,
"CC"
,
_
)
:
(
j2
,
"ADJ"
,
_
)
:
xs
)
=
groupNgrams
(
n1
:
n2
:
xs
)
where
...
...
@@ -38,7 +40,9 @@ groupNgrams ((n,"N",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (
-- groupNgrams ((j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",j2'):xs) = groupNgrams ((j1 <> " " <> j2, "ADJ", j2'):xs)
-- Groupe : Nom commun + préposition + Nom commun
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
p
,
"P"
,
_
)
:
(
n2
,
"NC"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
p
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
p
,
"P"
,
_
)
:
(
n2
,
"NC"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
p
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
p
,
"P"
,
_
)
:
(
n2
,
"NPP"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
p
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
prep
,
"P"
,
_
)
:
(
det
,
"DET"
,
_
)
:
(
n2
,
"NPP"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
prep
<>
" "
<>
det
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
-- Groupe : Plusieurs adjectifs successifs
groupNgrams
((
x
,
"ADJ"
,
_
)
:
(
y
,
"ADJ"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"ADJ"
,
yy
)
:
xs
)
...
...
test/Ngrams/Lang/En.hs
View file @
f12df281
...
...
@@ -16,9 +16,21 @@ ngramsExtractionTest = hspec $ do
it
"
\"
Of
\"
seperates two ngrams"
$
do
t1
<-
pm
(
selectNgrams
EN
)
<$>
extractNgrams
EN
(
textTest
!!
0
)
t1
`
shouldBe
`
[[(
"Alcoholic extract
"
,
"NN"
,
"O"
),(
"
Kaempferia galanga"
,
"NN"
,
"O"
),(
"analgesic activities"
,
"NN+CC"
,
"O"
),(
"antiinflammatory activities"
,
"NN+CC"
,
"O"
),(
"animal models"
,
"NN"
,
"O"
)]]
t1
`
shouldBe
`
[[(
"Alcoholic extract
of
Kaempferia galanga"
,
"NN"
,
"O"
),(
"analgesic activities"
,
"NN+CC"
,
"O"
),(
"antiinflammatory activities"
,
"NN+CC"
,
"O"
),(
"animal models"
,
"NN"
,
"O"
)]]
it
"Tests the conjunction of coordination in two ngrams with its adjectives"
$
do
t2
<-
pm
(
selectNgrams
EN
)
<$>
extractNgrams
EN
(
textTest
!!
2
)
t2
`
shouldBe
`
[[(
"Acute activities"
,
"NN+CC"
,
"O"
),(
"sub acute inflammatory activities"
,
"NN+CC"
,
"O"
),(
"rats"
,
"NNS"
,
"O"
),(
"carrageenan"
,
"NN"
,
"O"
),(
"paw edema"
,
"NN"
,
"O"
),(
"cotton pellet"
,
"NN"
,
"O"
),(
"granuloma models"
,
"NN"
,
"O"
)]]
it
"Tests nouns with preposition and determinants"
$
do
let
t
=
"Donald Trump is president of the United-States of America."
t2
<-
pm
(
selectNgrams
EN
)
<$>
extractNgrams
EN
t
t2
`
shouldBe
`
[[(
"Donald Trump"
,
"NNP"
,
"PERSON"
),(
"president of the United-States of America"
,
"NN"
,
"LOCATION"
)]]
test/Ngrams/Lang/Fr.hs
View file @
f12df281
...
...
@@ -39,6 +39,12 @@ ngramsExtractionTest = hspec $ do
testFr0
<-
pm
(
selectNgrams
FR
)
<$>
(
extractNgrams
FR
)
textFr0
testFr0
`
shouldBe
`
[[(
"problème du jour"
,
"NC"
,
"O"
)]]
it
"Groupe: Nom commun + préposition + déterminant + Nom commun"
$
do
let
textFr0
=
"Emmanuel Macron est le président de la France."
testFr0
<-
pm
(
selectNgrams
FR
)
<$>
(
extractNgrams
FR
)
textFr0
testFr0
`
shouldBe
`
[[(
"Emmanuel Macron"
,
"NPP"
,
"PERSON"
),(
"président de la France"
,
"NC"
,
"LOCATION"
)]]
it
"Groupe: Nom commun + préposition + Nom commun + prép + Nom commun"
$
do
let
textFr1
=
"L'heure d'arrivée des coureurs dépend de la météo du jour."
testFr1
<-
pm
(
selectNgrams
FR
)
<$>
(
extractNgrams
FR
)
textFr1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment