1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
{-|
Module : Gargantext.Text.Terms.Multi.Lang.En
Description : English Grammar rules to group postag tokens.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Rule-based grammars are computed in this english module in order to group
the tokens into extracted terms.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Terms.Multi.Lang.En (group)
where
import Gargantext.Prelude
import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi.Group
------------------------------------------------------------------------
-- | Rule grammar to group tokens
group :: [TokenTag] -> [TokenTag]
group [] = []
group ntags = group2 NP NP
$ group2 NP VB
-- $ group2 NP IN
$ group2 IN DT
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
$ ntags
------------------------------------------------------------------------
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
--
--groupNgrams (x:xs) = (x:(groupNgrams xs))