Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f12df281
Commit
f12df281
authored
Nov 22, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[NGRAMS] improving ngrams extraction with prep (of/de) respectively in eng/fr.
parent
ea51f50d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
5 deletions
+32
-5
En.hs
src/Data/Gargantext/Ngrams/Lang/En.hs
+6
-1
Fr.hs
src/Data/Gargantext/Ngrams/Lang/Fr.hs
+5
-1
En.hs
test/Ngrams/Lang/En.hs
+15
-3
Fr.hs
test/Ngrams/Lang/Fr.hs
+6
-0
No files found.
src/Data/Gargantext/Ngrams/Lang/En.hs
View file @
f12df281
...
...
@@ -47,6 +47,7 @@ groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN",
groupNgrams
((
x
,
"NNP"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
y
,
"NP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
y
,
"NNS"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NP"
,
_
)
:
(
y
,
"NP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
...
...
@@ -57,7 +58,11 @@ groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN",
-- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
-- > should be (antiinflammatory activity) <> (analgesic activity)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
"of"
,
"IN"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
"of"
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
y
,
"NNP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
det
,
"DT"
,
_
)
:
(
y
,
"NN"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
det
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
"NN"
,
_
)
:
(
o
,
"IN"
,
_
)
:
(
det
,
"DT"
,
_
)
:
(
y
,
"NNP"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
o
<>
" "
<>
det
<>
" "
<>
y
,
"NN"
,
yy
)
:
xs
)
groupNgrams
((
x
,
_
,
"PERSON"
)
:
(
y
,
yy
,
"PERSON"
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
yy
,
"PERSON"
)
:
xs
)
groupNgrams
((
x
,
_
,
"ORGANIZATION"
)
:
(
y
,
yy
,
"ORGANIZATION"
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
yy
,
"ORGANIZATION"
)
:
xs
)
...
...
src/Data/Gargantext/Ngrams/Lang/Fr.hs
View file @
f12df281
...
...
@@ -22,6 +22,8 @@ selectNgrams xs = pf selectNgrams' xs
groupNgrams
::
[(
Text
,
Text
,
Text
)]
->
[(
Text
,
Text
,
Text
)]
groupNgrams
[]
=
[]
--groupNgrams ((_,"DET",_):xs) = groupNgrams xs
-- "Groupe : nom commun et adjectifs avec conjonction"
groupNgrams
((
n
,
"NC"
,
n'
)
:
(
j1
,
"ADJ"
,
_
)
:
(
_
,
"CC"
,
_
)
:
(
j2
,
"ADJ"
,
_
)
:
xs
)
=
groupNgrams
(
n1
:
n2
:
xs
)
where
...
...
@@ -38,7 +40,9 @@ groupNgrams ((n,"N",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (
-- groupNgrams ((j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",j2'):xs) = groupNgrams ((j1 <> " " <> j2, "ADJ", j2'):xs)
-- Groupe : Nom commun + préposition + Nom commun
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
p
,
"P"
,
_
)
:
(
n2
,
"NC"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
p
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
p
,
"P"
,
_
)
:
(
n2
,
"NC"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
p
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
p
,
"P"
,
_
)
:
(
n2
,
"NPP"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
p
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
groupNgrams
((
n1
,
"NC"
,
_
)
:
(
prep
,
"P"
,
_
)
:
(
det
,
"DET"
,
_
)
:
(
n2
,
"NPP"
,
n2'
)
:
xs
)
=
groupNgrams
((
n1
<>
" "
<>
prep
<>
" "
<>
det
<>
" "
<>
n2
,
"NC"
,
n2'
)
:
xs
)
-- Groupe : Plusieurs adjectifs successifs
groupNgrams
((
x
,
"ADJ"
,
_
)
:
(
y
,
"ADJ"
,
yy
)
:
xs
)
=
groupNgrams
((
x
<>
" "
<>
y
,
"ADJ"
,
yy
)
:
xs
)
...
...
test/Ngrams/Lang/En.hs
View file @
f12df281
...
...
@@ -16,9 +16,21 @@ ngramsExtractionTest = hspec $ do
it
"
\"
Of
\"
seperates two ngrams"
$
do
t1
<-
pm
(
selectNgrams
EN
)
<$>
extractNgrams
EN
(
textTest
!!
0
)
t1
`
shouldBe
`
[[(
"Alcoholic extract
"
,
"NN"
,
"O"
),(
"
Kaempferia galanga"
,
"NN"
,
"O"
),(
"analgesic activities"
,
"NN+CC"
,
"O"
),(
"antiinflammatory activities"
,
"NN+CC"
,
"O"
),(
"animal models"
,
"NN"
,
"O"
)]]
t1
`
shouldBe
`
[[(
"Alcoholic extract
of
Kaempferia galanga"
,
"NN"
,
"O"
),(
"analgesic activities"
,
"NN+CC"
,
"O"
),(
"antiinflammatory activities"
,
"NN+CC"
,
"O"
),(
"animal models"
,
"NN"
,
"O"
)]]
it
"Tests the conjunction of coordination in two ngrams with its adjectives"
$
do
t2
<-
pm
(
selectNgrams
EN
)
<$>
extractNgrams
EN
(
textTest
!!
2
)
t2
`
shouldBe
`
[[(
"Acute activities"
,
"NN+CC"
,
"O"
),(
"sub acute inflammatory activities"
,
"NN+CC"
,
"O"
),(
"rats"
,
"NNS"
,
"O"
),(
"carrageenan"
,
"NN"
,
"O"
),(
"paw edema"
,
"NN"
,
"O"
),(
"cotton pellet"
,
"NN"
,
"O"
),(
"granuloma models"
,
"NN"
,
"O"
)]]
it
"Tests nouns with preposition and determinants"
$
do
let
t
=
"Donald Trump is president of the United-States of America."
t2
<-
pm
(
selectNgrams
EN
)
<$>
extractNgrams
EN
t
t2
`
shouldBe
`
[[(
"Donald Trump"
,
"NNP"
,
"PERSON"
),(
"president of the United-States of America"
,
"NN"
,
"LOCATION"
)]]
test/Ngrams/Lang/Fr.hs
View file @
f12df281
...
...
@@ -39,6 +39,12 @@ ngramsExtractionTest = hspec $ do
testFr0
<-
pm
(
selectNgrams
FR
)
<$>
(
extractNgrams
FR
)
textFr0
testFr0
`
shouldBe
`
[[(
"problème du jour"
,
"NC"
,
"O"
)]]
it
"Groupe: Nom commun + préposition + déterminant + Nom commun"
$
do
let
textFr0
=
"Emmanuel Macron est le président de la France."
testFr0
<-
pm
(
selectNgrams
FR
)
<$>
(
extractNgrams
FR
)
textFr0
testFr0
`
shouldBe
`
[[(
"Emmanuel Macron"
,
"NPP"
,
"PERSON"
),(
"président de la France"
,
"NC"
,
"LOCATION"
)]]
it
"Groupe: Nom commun + préposition + Nom commun + prép + Nom commun"
$
do
let
textFr1
=
"L'heure d'arrivée des coureurs dépend de la météo du jour."
testFr1
<-
pm
(
selectNgrams
FR
)
<$>
(
extractNgrams
FR
)
textFr1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment