[RENAME/ORG] ngrams -> terms.

c0914f9a · Alexandre Delanoë · d12b405e · c0914f9a · c0914f9a · c0914f9a
Commit c0914f9a authored May 19, 2018 by Alexandre Delanoë
25 changed files
--- a/app/CleanCsvCorpus.hs
+++ b/app/CleanCsvCorpus.hs
@@ -7,10 +7,8 @@ Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

-Given a Gargantext CSV File and its Query
-This script cleans and compress the contexts around the main terms of the query.
-
-
+Given a Gargantext CSV File and its Query This script cleans and
+compress the contexts around the main terms of the query.
 -}

 {-# LANGUAGE NoImplicitPrelude #-}
@@ -20,7 +18,7 @@ module CleanCsvCorpus  where
 --import GHC.IO (FilePath)
 import Data.SearchEngine as S
 import qualified Data.Set as S
-
+import Data.Text (pack)
 import Data.Vector (Vector)
 import qualified Data.Vector as V

@@ -50,7 +48,7 @@ main = do
  
  let docs   = toDocs csvDocs
  let engine = insertDocs docs initialDocSearchEngine
-  let docIds = S.query engine q
+  let docIds = S.query engine (map pack q)
  let docs'  = fromDocs $ filterDocs docIds (V.fromList docs)
  
  putStrLn $ "Number of documents after:" <> show (V.length docs')

--- a/app/Main.hs
+++ b/app/Main.hs
@@ -14,13 +14,13 @@ Script to start gargantext with different modes (Dev, Prod, Mock).
 {-# LANGUAGE DataKinds         #-}
 {-# LANGUAGE DeriveGeneric     #-}
 {-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE StandaloneDeriving #-}
 {-# LANGUAGE TypeOperators     #-}

 module Main where

-import Prelude (putStrLn)

 import Options.Generic
 import Data.Text (unpack)

--- a/package.yaml
+++ b/package.yaml
@@ -23,18 +23,10 @@ library:
  - Gargantext
  - Gargantext.Prelude
  - Gargantext.Core
-  - Gargantext.Core.Utils
  - Gargantext.Core.Types
-  - Gargantext.Core.Types.Node
  - Gargantext.Text
-  - Gargantext.Text.Metrics
-  - Gargantext.Text.Metrics.Occurrences
-  - Gargantext.Text.Metrics.FrequentItemSet
-  - Gargantext.Text.Ngrams.PosTagging.CoreNLP
-  - Gargantext.Text.Ngrams.PosTagging.Parser
-  - Gargantext.Text.Ngrams.Token.Text
+  - Gargantext.Text.Search
  - Gargantext.Text.Parsers.CSV
-  - Gargantext.Text.Parsers.Date
  - Gargantext.API
  dependencies:
  - QuickCheck
@@ -124,14 +116,18 @@ library:

 executable:
  main: Main.hs
-  source-dirs:      app
-  ghc-options:         -threaded -rtsopts -with-rtsopts=-N -O2
+  source-dirs: app
+  ghc-options: -threaded -rtsopts -with-rtsopts=-N -O2
  dependencies:
    - base
+    - containers
    - gargantext
+    - vector
+    - cassava
    - ini
    - optparse-generic
    - unordered-containers
+    - full-text-search

 tests:
  garg-test:

--- a/src/Gargantext.hs
+++ b/src/Gargantext.hs
@@ -11,7 +11,7 @@ Portability : POSIX
 -}

 module Gargantext (
-                     module Gargantext.Database,
+                     module Gargantext.Database
 --                     module Gargantext.Ngrams,
 --                     module Gargantext.Utils,
                    ) where

--- a/src/Gargantext/API/FrontEnd.hs
+++ b/src/Gargantext/API/FrontEnd.hs
@@ -6,18 +6,17 @@ License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX
-}

-{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
+Loads all static file for the front-end.

+-}

 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE DataKinds         #-}
 {-# LANGUAGE TemplateHaskell   #-}

 ---------------------------------------------------------------------
-module Gargantext.API.FrontEnd
-      where
+module Gargantext.API.FrontEnd where

 import           Servant.Static.TH (createApiAndServerDecs)


--- a/src/Gargantext/Core/Types.hs
+++ b/src/Gargantext/Core/Types.hs
@@ -11,11 +11,104 @@ Here is a longer description of this module, containing some
 commentary with @some markup@.
 -}

+------------------------------------------------------------------------
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE DeriveGeneric     #-}

 module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
-                          , module Gargantext.Core.Types.Node
-                          ) where
+                             , module Gargantext.Core.Types.Node
+                             , Term, Terms(..)
+                             , TokenTag(..), POS(..), NER(..)
+                             ) where
+
+import GHC.Generics
+import Data.Aeson
+import Data.Monoid
+import qualified Data.Set as S
+import Data.Set (Set, empty)
+
+import Data.Text (Text, unpack)

 import Gargantext.Core.Types.Main
 import Gargantext.Core.Types.Node
+import Gargantext.Prelude
+
+------------------------------------------------------------------------
+
+type Term  = Text
+
+data Terms = Terms { _terms_label :: [Text]
+                   , _terms_stem  :: Set Text
+                   } deriving (Show)
+
+instance Eq Terms where
+  (==) (Terms _ s1) (Terms _ s2) = s1 `S.isSubsetOf` s2
+                           || s2 `S.isSubsetOf` s1
+
+
+------------------------------------------------------------------------
+data Tag = POS | NER
+  deriving (Show, Eq)
+------------------------------------------------------------------------
+data POS = NP
+         | JJ  | VB
+         | CC  | IN | DT
+         | NoPos
+  deriving (Show, Generic, Eq)
+------------------------------------------------------------------------
+instance FromJSON POS where
+  parseJSON = withText "String" (\x -> pure (pos $ unpack x))
+    where
+      pos :: [Char] -> POS
+      pos "NP"  = NP
+      pos "NN"  = NP
+      pos "NC"  = NP
+      pos "NNS" = NP
+      pos "NNP" = NP
+      pos "JJ"  = JJ
+      pos "ADJ" = JJ
+      pos "VB"  = VB
+      pos "VBN" = VB
+      pos "VBG" = VB
+      pos "CC"  = CC
+      pos "IN"  = IN
+      pos "DT"  = DT
+      -- French specific
+      pos "P"  = IN
+      pos  _    = NoPos
+
+instance ToJSON POS
+------------------------------------------------------------------------
+data NER = PERSON | ORGANIZATION | LOCATION | NoNER
+  deriving (Show, Generic)
+------------------------------------------------------------------------
+instance FromJSON NER where
+  parseJSON = withText "String" (\x -> pure (ner $ unpack x))
+    where
+      ner :: [Char] -> NER
+      ner "PERSON"       = PERSON
+      ner "ORGANIZATION" = ORGANIZATION
+      ner "LOCATION"     = LOCATION
+      ner  _             = NoNER
+
+instance ToJSON NER
+
+data TokenTag  = TokenTag { _my_token_word :: [Text]
+                          , _my_token_stem :: Set Text
+                          , _my_token_pos  :: Maybe POS
+                          , _my_token_ner  :: Maybe NER
+                          } deriving (Show)
+
+instance Monoid TokenTag where
+  mempty = TokenTag [] empty Nothing Nothing
+
+  mappend (TokenTag w1 s1 p1 n1) (TokenTag w2 s2 p2 _)
+         = TokenTag (w1 <> w2) (s1 <> s2) p3 n1
+          where
+            p3 = case (p1,p2) of
+                   (Just JJ, Just NP) -> Just NP
+                   (Just VB, Just NP) -> Just NP
+                   _                  -> p1
+
+  mconcat = foldl mappend mempty
+
--- a/src/Gargantext/Text.hs
+++ b/src/Gargantext/Text.hs
@@ -7,10 +7,7 @@ Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

-Ngrams exctration.
-
-Definitions of ngrams.
-n non negative integer
+Text gathers terms in unit of contexts.

 -}

@@ -24,25 +21,17 @@ import qualified Data.Text as DT
 --import Data.Text.IO (readFile)


-import Data.Map.Strict  (Map
-                        , lookupIndex
-                        --, fromList, keys
-                        )
-
 import Data.Text (Text, split)
-import qualified Data.Map.Strict as M (filter)

 import NLP.FullStop (segment)
 -----------------------------------------------------------------
-import Gargantext.Text.Ngrams
-import Gargantext.Text.Metrics.Occurrences

-import qualified Gargantext.Text.Metrics.FrequentItemSet as FIS
+import Gargantext.Core.Types
 import Gargantext.Prelude hiding (filter)
 -----------------------------------------------------------------

-data Group = Group { _group_label  ::  Ngrams
-                   , _group_ngrams :: [Ngrams]
+data Group = Group { _group_label  :: Terms
+                   , _group_terms :: Terms
                   } deriving (Show)


@@ -52,41 +41,10 @@ clean txt = DT.map clean' txt
    clean' '’' = '\''
    clean' c  = c

+
 --noApax :: Ord a => Map a Occ -> Map a Occ
 --noApax m = M.filter (>1) m

-- | /!\ indexes are not the same:
-
-- | Index ngrams from Map
--indexNgram :: Ord a => Map a Occ -> Map Index a
--indexNgram m = fromList (zip [1..] (keys m))
-
-- | Index ngrams from Map
--ngramIndex :: Ord a => Map a Occ -> Map a Index
--ngramIndex m = fromList (zip (keys m) [1..])
-
-indexWith :: Ord a => Map a Occ -> [a] -> [Int]
-indexWith m xs = unMaybe $ map (\x -> lookupIndex x m) xs
-
-indexIt :: Ord a => [[a]] -> (Map a Int, [[Int]])
-indexIt xs = (m, is)
-  where
-    m  = sumOcc (map occ  xs)
-    is = map    (indexWith m) xs
-
-list2fis :: Ord a => FIS.Frequency -> [[a]] -> (Map a Int, [FIS.Fis])
-list2fis n xs = (m', fs)
-  where
-    (m, is) = indexIt xs
-    m'      = M.filter (>50000) m
-    fs      = FIS.all n is
-
-text2fis :: FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
-text2fis n xs = list2fis n (map ngrams xs)
-
--text2fisWith :: FIS.Size -> FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
--text2fisWith = undefined
-
 -------------------------------------------------------------------
 -- Contexts of text
 sentences :: Text -> [Text]
@@ -112,17 +70,18 @@ testText_fr = DT.pack "La fouille de textes ou « l'extraction de connaissances
 -- | Ngrams Test
 -- >>> ngramsTest testText
 -- 248
-ngramsTest :: Text -> Int
-ngramsTest x=  length ws
-  where
-    --txt = concat <$> lines <$> clean <$> readFile filePath
-    txt = clean x
-    -- | Number of sentences
-    --ls   = sentences $ txt
-    -- | Number of monograms used in the full text
-    ws   = ngrams    $ txt
-    -- | stem ngrams
+--ngramsTest :: Text -> Int
+--ngramsTest x =  length ws
+--  where
+--    --txt = concat <$> lines <$> clean <$> readFile filePath
+--    txt = clean x
+--    -- | Number of sentences
+--    --ls   = sentences $ txt
+--    -- | Number of monograms used in the full text
+--    ws   = ngrams    $ txt
+--    -- | stem ngrams
    -- TODO
    -- group ngrams
    --ocs  = occ       $ ws

+
--- a/src/Gargantext/Text/Metrics/FrequentItemSet.hs
+++ b/src/Gargantext/Text/Metrics/FrequentItemSet.hs
@@ -89,3 +89,39 @@ fisWith s f is = unMaybe $ map items2fis $ filter' $ runLCMmatrix is f

 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
+
+
+--
+---- | /!\ indexes are not the same:
+--
+---- | Index ngrams from Map
+----indexNgram :: Ord a => Map a Occ -> Map Index a
+----indexNgram m = fromList (zip [1..] (keys m))
+--
+---- | Index ngrams from Map
+----ngramIndex :: Ord a => Map a Occ -> Map a Index
+----ngramIndex m = fromList (zip (keys m) [1..])
+--
+--indexWith :: Ord a => Map a Occ -> [a] -> [Int]
+--indexWith m xs = unMaybe $ map (\x -> lookupIndex x m) xs
+--
+--indexIt :: Ord a => [[a]] -> (Map a Int, [[Int]])
+--indexIt xs = (m, is)
+--  where
+--    m  = sumOcc (map occ  xs)
+--    is = map    (indexWith m) xs
+--
+--list2fis :: Ord a => FIS.Frequency -> [[a]] -> (Map a Int, [FIS.Fis])
+--list2fis n xs = (m', fs)
+--  where
+--    (m, is) = indexIt xs
+--    m'      = M.filter (>50000) m
+--    fs      = FIS.all n is
+--
+--text2fis :: FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
+--text2fis n xs = list2fis n (map terms xs)
+--
+----text2fisWith :: FIS.Size -> FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
+----text2fisWith = undefined
+--
+
--- a/src/Gargantext/Text/Ngrams/PosTagging.hs
+++ b/src/Gargantext/Text/Ngrams/PosTagging.hs
-{-|
-Module      : Gargantext.Text.Ngrams.PosTagging
-Description : All parsers of Gargantext in one file.
-Copyright   : (c) CNRS, 2017 - present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-
-Mainly reexport functions in @Data.Text.Metrics@
-}
-
-{-# LANGUAGE NoImplicitPrelude #-}
-
-module Gargantext.Text.Ngrams.PosTagging
-  where
-
-
--- a/src/Gargantext/Text/Ngrams/PosTagging/Lang/En.hs
+++ b/src/Gargantext/Text/Ngrams/PosTagging/Lang/En.hs
-{-|
-Module      : Gargantext.Text.Ngrams.PosTagging.Lang.En
-Description : 
-Copyright   : (c) CNRS, 2017-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-
-Here is a longer description of this module, containing some
-commentary with @some markup@.
-}
-
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
-
-module Gargantext.Text.Ngrams.PosTagging.Lang.En (group)
-  where
-
--import Data.Text (Text)
-import Data.Maybe (Maybe(Just))
-import Gargantext.Text.Ngrams.PosTagging.CoreNLP
-import Gargantext.Prelude
-
-group :: [NgramsTag] -> [NgramsTag]
-group [] = []
-group ntags = group2 NP NP
-           $ group2 NP VB
-           $ group2 NP IN
-           $ group2 IN DT
-           $ group2 VB NP
-           $ group2 JJ NP
-           $ group2 JJ JJ
-           $ group2 JJ CC
-           $ ntags
-
------------------------------------------------------------------------
-- | FIXME p1 and p2 not really taken into account
-group2 :: POS -> POS -> [NgramsTag] -> [NgramsTag]
-group2 p1 p2 (x@(NgramsTag _ _ (Just p1') _):y@(NgramsTag _ _ (Just p2') _):z) =
-  if (p1 == p1') && (p2 == p2')
-     then group2 p1 p2 (x<>y : z)
-     else (x : group2 p1 p2 (y:z))
-group2 p1 p2 (x@(NgramsTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
-group2 _ _ [x@(NgramsTag _ _ (Just _) _)] = [x]
-group2 p1 p2 (x@(NgramsTag _ _ (Just _) _):y@(NgramsTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
-group2 _ _ [] = []
-
-- group3 :: POS -> POS -> POS -> [NgramsTag] -> [NgramsTag]
-- group xs = group3 NN IN DT xs
-
-
-- TO BE REMOVED old code
--groupNgrams ((j1,"JJ",j1'):(c1,"CC",c1'):(j2,"JJ",j2'):(j3,"JJ",_):xs)      = groupNgrams (jn1:cc:jn2:xs)
--    where
--        jn j' j'' jn' = (j' <> " " <> j'', "JJ", jn')
--        cc  = (c1, "CC", c1')
--        jn1 = (j1, "JJ", j1')
--        jn2 = jn j2 j3 j2'
--
--groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NN",nn):xs)      = groupNgrams (jn1:jn2:xs)
--    where
--        jn j m mm p = (j <> " " <> m, p, mm)
--        jn1 = jn j1 n nn ("NN+CC" :: Text)
--        jn2 = jn j2 n nn ("NN+CC" :: Text)
--
--groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NNS",nn):xs)      = groupNgrams (jn1:jn2:xs)
--    where
--        jn j m mm p = (j <> " " <> m, p, mm)
--        jn1 = jn j1 n nn ("NN+CC" :: Text)
--        jn2 = jn j2 n nn ("NN+CC" :: Text)
--
--groupNgrams ((x,"JJ",_):(y,"JJ",yy):xs)    = groupNgrams ((x <> " " <> y, "JJ", yy):xs)
--groupNgrams ((x,"JJ",_):(y,"NN",yy):xs)    = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs)   = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--
--groupNgrams ((x,"NNP",_):(y,"NN",yy):xs)   = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--groupNgrams ((x,"NN",_):(y,"NP",yy):xs)    = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--
--groupNgrams ((x,"NN",_):(y,"NNS",yy):xs)   = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--groupNgrams ((x,"NP",_):(y,"NP",yy):xs)    = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--
--groupNgrams ((x,"NN",_):(y,"NN",yy):xs)    = groupNgrams ((x <> " " <> y, "NN", yy):xs)
--
--
---- extractNgrams "Test the antiinflammatory or analgesic activity?"
---- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
---- > should be (antiinflammatory activity) <> (analgesic activity)
--
--groupNgrams ((x,"NN",_):(o,"IN",_):(y,"NN",yy):xs)       = groupNgrams ((x <> " " <> o <> " " <> y, "NN", yy):xs)
--groupNgrams ((x,"NN",_):(o,"IN",_):(y,"NNP",yy):xs)      = groupNgrams ((x <> " " <> o <> " " <> y, "NN", yy):xs)
--
--groupNgrams ((x,"NN",_):(o,"IN",_):(det,"DT",_):(y,"NN",yy):xs)    = groupNgrams ((x <> " " <> o <> " " <> det <> " " <> y, "NN", yy):xs)
--groupNgrams ((x,"NN",_):(o,"IN",_):(det,"DT",_):(y,"NNP",yy):xs)   = groupNgrams ((x <> " " <> o <> " " <> det <> " " <> y, "NN", yy):xs)
--
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs)             = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs)         = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
--
--groupNgrams (x:xs)                                          = (x:(groupNgrams xs))
-
-
--textTest :: [Text]
--textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. "
--           , "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. "
--           , " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. "
--          , "In both models, the standard drug used was aspirin 100 mg/kg. "
--          , "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. "
--         , "Analgesic activity was studied in rats using hot plate and tail-flick models. "
--         , "Codeine 5 mg/kg and vehicle served as standard and control respectively. "
--         , "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. "
--         , "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "]
--- a/src/Gargantext/Text/Ngrams/PosTagging/Lang/Fr.hs
+++ b/src/Gargantext/Text/Ngrams/PosTagging/Lang/Fr.hs
-{-|
-Module      : Gargantext.Text.Ngrams.PosTagging.Lang.Fr
-Description : 
-Copyright   : (c) CNRS, 2017-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-
-This @group@ function groups horizontally ngrams in their context of
-sentence according to grammars specific of each language. In english, JJ
-is ADJectiv in french. -
-
-}
-
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
-
-module Gargantext.Text.Ngrams.PosTagging.Lang.Fr (group)
-  where
-
-import Data.Maybe (Maybe(Just))
-import Gargantext.Text.Ngrams.PosTagging.CoreNLP
-import Gargantext.Prelude
-
-
-group :: [NgramsTag] -> [NgramsTag]
-group [] = []
-group ntags = group2 NP NP
-           $ group2 NP VB
-           $ group2 NP IN
-           $ group2 IN DT
-           $ group2 VB NP
-           $ group2 JJ NP
-           $ group2 NP JJ
-           $ group2 JJ JJ
-           $ group2 JJ CC
-           $ ntags
-
------------------------------------------------------------------------
-- | FIXME p1 and p2 not really taken into account
-group2 :: POS -> POS -> [NgramsTag] -> [NgramsTag]
-group2 p1 p2 (x@(NgramsTag _ _ (Just p1') _):y@(NgramsTag _ _ (Just p2') _):z) =
-  if (p1 == p1') && (p2 == p2')
-     then group2 p1 p2 (x<>y : z)
-     else (x : group2 p1 p2 (y:z))
-group2 p1 p2 (x@(NgramsTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
-group2 _ _ [x@(NgramsTag _ _ (Just _) _)] = [x]
-group2 p1 p2 (x@(NgramsTag _ _ (Just _) _):y@(NgramsTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
-group2 _ _ [] = []
-
-
------------------------------------------------------------------------
-
-
--module Gargantext.Text.Ngrams.PosTagging.Lang.Fr (selectNgrams, groupNgrams, textTest)
--    where
--
--import Gargantext.Prelude
--import Data.Text (Text)
--import Data.Monoid ((<>))
--
--selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
--selectNgrams xs = filter selectNgrams' xs
--    where
--        selectNgrams' (_,"N"    ,_       ) = True
--        selectNgrams' (_,"NC"   ,_       ) = True
--        selectNgrams' (_,"NN+CC",_       ) = True
--        selectNgrams' (_,_      ,"PERSON"      ) = True
--        selectNgrams' (_,_      ,"ORGANIZATION") = True
--        selectNgrams' (_,_      ,"LOCATION"    ) = True
--        selectNgrams' (_,_      ,_       ) = False
--
--
--groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
--groupNgrams []       = []
--
----groupNgrams ((_,"DET",_):xs) = groupNgrams xs
--
---- "Groupe : nom commun et adjectifs avec conjonction"
--groupNgrams ((n,"NC",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
--    where
--        n1 = (n <> " " <> j1, "NC", n')
--        n2 = (n <> " " <> j2, "NC", n')
--
---- /!\ sometimes N instead of NC (why?)
--groupNgrams ((n,"N",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
--    where
--        n1 = (n <> " " <> j1, "N", n')
--        n2 = (n <> " " <> j2, "N", n')
--
---- Groupe : Adjectif + Conjonction de coordination + Adjectif
---- groupNgrams ((j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",j2'):xs) = groupNgrams ((j1 <> " " <> j2, "ADJ", j2'):xs)
--
---- Groupe : Nom commun + préposition + Nom commun
--groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NC",n2'):xs)  = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
--groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
--groupNgrams ((n1,"NC",_):(prep,"P",_):(det,"DET",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> prep <> " " <> det <> " " <> n2, "NC", n2'):xs)
--
---- Groupe : Plusieurs adjectifs successifs
--groupNgrams ((x,"ADJ",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "ADJ", yy):xs)
--
---- Groupe : nom commun et adjectif
--groupNgrams ((x,"NC",_):(y,"ADJ",yy):xs)  = groupNgrams ((x <> " " <> y, "NC", yy):xs)
---- /!\ sometimes N instead of NC (why?)
--groupNgrams ((x,"N",_):(y,"ADJ",yy):xs)   = groupNgrams ((x <> " " <> y, "NC", yy):xs)
--
---- Groupe : adjectif et nom commun
--groupNgrams ((x,"ADJ",_):(y,"NC",yy):xs)  = groupNgrams ((x <> " " <> y, "NC", yy):xs)
---- /!\ sometimes N instead of NC (why?)
--groupNgrams ((x,"ADJ",_):(y,"N",yy):xs)   = groupNgrams ((x <> " " <> y, "NC", yy):xs)
--
--
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs)             = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
--
--
---- Si aucune des règles précédentes n'est remplie
--groupNgrams (x:xs)                        = (x:(groupNgrams xs))
--
--
--textTest :: [Text]
--textTest = [ "L'heure d'arrivée des coureurs dépend de la météo du jour."]
--
--- a/src/Gargantext/Text/Ngrams/PosTagging/Parser.hs
+++ b/src/Gargantext/Text/Ngrams/PosTagging/Parser.hs
-{-|
-Module      : Gargantext.Text.Ngrams.PosTagging.Parser
-Description : 
-Copyright   : (c) CNRS, 2017-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-
-Here is a longer description of this module, containing some
-commentary with @some markup@.
-
-Ngrams selection algorithms
-A form is a list of characters seperated by one or more spaces in a sentence.
-A word is a form.
-
-}
-
-{-# LANGUAGE NoImplicitPrelude   #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-
-module Gargantext.Text.Ngrams.PosTagging.Parser
-  where
-
-import Data.Text hiding (map, group)
-
-import Gargantext.Prelude
-import Gargantext.Core (Lang(..))
-import Gargantext.Text.Ngrams.PosTagging.CoreNLP
-
-import qualified Gargantext.Text.Ngrams.PosTagging.Lang.En as En
-import qualified Gargantext.Text.Ngrams.PosTagging.Lang.Fr as Fr
-
-
-extractNgrams :: Lang -> Text -> IO [[NgramsTag]]
-extractNgrams lang s = map (group lang) <$> extractNgrams' lang s
-
-
-extractNgrams' :: Lang -> Text -> IO [[NgramsTag]]
-extractNgrams' lang t =  map tokens2ngramsTags
-                     <$> map _sentenceTokens
-                     <$> _sentences
-                     <$> corenlp lang t
-
-
---- | This function analyse and groups (or not) ngrams according to 
----   grammars specific of each language.
-group :: Lang -> [NgramsTag] -> [NgramsTag]
-group EN = En.group
-group FR = Fr.group
-
--- a/src/Gargantext/Text/Search.hs
+++ b/src/Gargantext/Text/Search.hs
@@ -28,8 +28,8 @@ import Data.Ix
 import Data.Text (Text)

 import Gargantext.Prelude
-import Gargantext.Text.Ngrams
-import Gargantext.Text.Ngrams.Stem as ST
+import Gargantext.Text.Terms.Mono (monoterms)
+import Gargantext.Text.Terms.Mono.Stem as ST
 import Gargantext.Text.Parsers.CSV

 type DocId = Int
@@ -52,14 +52,14 @@ docSearchConfig :: SearchConfig Doc DocId DocField NoFeatures
 docSearchConfig =
    SearchConfig {
      documentKey           = d_docId,
-      extractDocumentTerms  = extractTokens,
+      extractDocumentTerms  = extractTerms,
      transformQueryTerm    = normaliseQueryToken,
      documentFeatureValue  = const noFeatures
  }
  where
-    extractTokens :: Doc -> DocField -> [Text]
-    extractTokens doc TitleField       = monograms (d_title doc)
-    extractTokens doc AbstractField    = monograms (d_abstract doc)
+    extractTerms :: Doc -> DocField -> [Text]
+    extractTerms doc TitleField       = monoterms (d_title doc)
+    extractTerms doc AbstractField    = monoterms (d_abstract doc)

    normaliseQueryToken :: Text -> DocField -> Text
    normaliseQueryToken tok =

--- a/src/Gargantext/Text/Ngrams.hs
+++ b/src/Gargantext/Text/Ngrams.hs
@@ -18,56 +18,27 @@ Using Latin numerical prefixes, an n-gram of size 1 is referred to as a

 Source: https://en.wikipedia.org/wiki/Ngrams

-}
-
-{-# LANGUAGE NoImplicitPrelude #-}
-
-module Gargantext.Text.Ngrams
-  where
-
-import Data.Char (Char, isAlphaNum, isSpace)
-import Data.Text (Text, split, splitOn, pack, toLower)
-
-import Data.Set  (Set)
-import qualified Data.Set as S
-
-import Gargantext.Prelude
-import Gargantext.Core
-
-import Gargantext.Text.Ngrams.Stem (stem)
+TODO
+-- Prelude.concat <$> Prelude.map (filter (\n -> _my_token_pos n == Just NP)) <$> extractNgrams  Gargantext.Core.EN   testText_en

+group Ngrams -> Tree
+compute occ by node of Tree
+group occs according groups

-data Ngrams = Ngrams { _ngrams_label :: [Text]
-                     , _ngrams_stem  :: Set Text
-                     } deriving (Show)
-
-
-data Terms = MonoGrams | MultiGrams
-type MonoGrams  = Text
-type MultiGrams = [Text]
+compute cooccurrences
+compute graph

+-}

-ngrams :: Text -> [Text]
-ngrams = monograms
+{-# LANGUAGE NoImplicitPrelude #-}

-text2ngrams :: Lang -> Text -> Ngrams
-text2ngrams lang txt = Ngrams txt' (S.fromList $ map (stem lang) txt')
+module Gargantext.Text.Terms
  where
-    txt' = splitOn (pack " ") txt

+import Gargantext.Core.Types

-equivNgrams :: Ngrams -> Ngrams -> Bool
-equivNgrams (Ngrams _ s1) (Ngrams _ s2) = s1 `S.isSubsetOf` s2
-                                       || s2 `S.isSubsetOf` s1
-
--monograms :: Text -> [Text]
--monograms xs = monograms $ toLower $ filter isGram xs
-
-monograms :: Text -> [Text]
-monograms txt = map toLower $ split isWord txt
-  where
-    isWord c = c `elem` [' ', '\'', ',', ';']
-
-isGram :: Char -> Bool
-isGram  c  = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
+------------------------------------------------------------------------
+tokenTag2terms :: TokenTag -> Terms
+tokenTag2terms (TokenTag w t _ _) =  Terms w t
+------------------------------------------------------------------------

--- a/src/Gargantext/Text/Ngrams/Lists.hs
+++ b/src/Gargantext/Text/Ngrams/Lists.hs
@@ -13,7 +13,7 @@ commentary with @some markup@.

 {-# LANGUAGE NoImplicitPrelude #-}

-module Gargantext.Text.Ngrams.Lists
+module Gargantext.Text.Terms.Lists
  where

 --import Data.Maybe

--- a/src/Gargantext/Text/Terms/Mono.hs
+++ b/src/Gargantext/Text/Terms/Mono.hs
+{-|
+Module      : Gargantext.Text.Terms.Mono
+Description : Mono Terms module
+Copyright   : (c) CNRS, 2017 - present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+Mono-terms are Nterms where n == 1.
+
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Gargantext.Text.Terms.Mono
+  where
+
+import Data.Text (Text, toLower, split, splitOn, pack)
+import qualified Data.Set as S
+
+import Gargantext.Core
+import Gargantext.Core.Types
+import Gargantext.Text.Terms.Mono.Stem (stem)
+
+import Gargantext.Prelude
+import Data.Char (isAlphaNum, isSpace)
+
+monoterms :: Text -> [Term]
+monoterms txt = map toLower $ split isWord txt
+  where
+    isWord c = c `elem` [' ', '\'', ',', ';']
+
+
+text2terms :: Lang -> Text -> Terms
+text2terms lang txt = Terms label stems
+  where
+    label = splitOn (pack " ") txt
+    stems = S.fromList $ map (stem lang) label
+
+  --monograms :: Text -> [Text]
+--monograms xs = monograms $ toLower $ filter isGram xs
+
+isGram :: Char -> Bool
+isGram  c  = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
+
--- a/src/Gargantext/Text/Ngrams/Stem.hs
+++ b/src/Gargantext/Text/Ngrams/Stem.hs
@@ -18,7 +18,7 @@ Source : https://en.wikipedia.org/wiki/Stemming
 -}


-module Gargantext.Text.Ngrams.Stem (stem, Lang(..))
+module Gargantext.Text.Terms.Mono.Stem (stem, Lang(..))
  where

 import Data.Text (Text)

--- a/src/Gargantext/Text/Ngrams/Stem/En.hs
+++ b/src/Gargantext/Text/Ngrams/Stem/En.hs
@@ -17,7 +17,7 @@ Adapted from:

 {-# LANGUAGE NoImplicitPrelude #-}

-module Gargantext.Text.Ngrams.Stem.En (stem)
+module Gargantext.Text.Terms.Mono.Stem.En (stem)
  where

 import Control.Monad

--- a/src/Gargantext/Text/Ngrams/Token.hs
+++ b/src/Gargantext/Text/Ngrams/Token.hs
@@ -17,11 +17,11 @@ Source: https://en.wikipedia.org/wiki/Tokenize

 {-# LANGUAGE NoImplicitPrelude #-}

-module Gargantext.Text.Ngrams.Token (tokenize)
+module Gargantext.Text.Terms.Mono.Token (tokenize)
  where

 import Data.Text (Text)
-import qualified Gargantext.Text.Ngrams.Token.Text as En
+import qualified Gargantext.Text.Terms.Mono.Token.En as En

 type Token = Text


--- a/src/Gargantext/Text/Ngrams/Token/Text.hs
+++ b/src/Gargantext/Text/Ngrams/Token/Text.hs
@@ -7,13 +7,12 @@ Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

-Inspired from https://bitbucket.org/gchrupala/lingo/overview
+First inspired from https://bitbucket.org/gchrupala/lingo/overview
 -}

-
 {-# LANGUAGE OverloadedStrings #-}

-module Gargantext.Text.Ngrams.Token.Text
+module Gargantext.Text.Terms.Mono.Token.En
    ( EitherList(..)
    , Tokenizer
    , tokenize

--- a/src/Gargantext/Text/Terms/Multi.hs
+++ b/src/Gargantext/Text/Terms/Multi.hs
+{-|
+Module      : Gargantext.Text.Terms.Multi
+Description : Multi Terms module
+Copyright   : (c) CNRS, 2017 - present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+Multi-terms are ngrams where n > 1.
+
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Gargantext.Text.Terms.Multi (extractTokenTags)
+  where
+
+import Data.Text hiding (map, group)
+
+import Gargantext.Prelude
+import Gargantext.Core (Lang(..))
+import Gargantext.Core.Types
+
+import Gargantext.Text.Terms.Multi.PosTagging
+import qualified Gargantext.Text.Terms.Multi.Lang.En as En
+import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr
+
+
+extractTokenTags :: Lang -> Text -> IO [[TokenTag]]
+extractTokenTags lang s = map (group lang) <$> extractTokenTags' lang s
+
+
+extractTokenTags' :: Lang -> Text -> IO [[TokenTag]]
+extractTokenTags' lang t =  map tokens2tokensTags
+                     <$> map _sentenceTokens
+                     <$> _sentences
+                     <$> corenlp lang t
+
+---- | This function analyses and groups (or not) ngrams according to
+----   specific grammars of each language.
+group :: Lang -> [TokenTag] -> [TokenTag]
+group EN = En.group
+group FR = Fr.group
+
--- a/src/Gargantext/Text/Terms/Multi/Group.hs
+++ b/src/Gargantext/Text/Terms/Multi/Group.hs
+{-|
+Module      : Gargantext.Text.Terms.Multi.Group
+Description : English Grammar rules to group postag tokens.
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+Rule-based grammars are computed in this english module in order to
+group the tokens into extracted terms.
+
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Gargantext.Text.Terms.Multi.Group (group2)
+  where
+
+import Data.Maybe (Maybe(Just))
+
+import Gargantext.Core.Types
+import Gargantext.Prelude
+
+-- | FIXME p1 and p2 not really taken into account
+group2 :: POS -> POS -> [TokenTag] -> [TokenTag]
+group2 p1 p2 (x@(TokenTag _ _ (Just p1') _):y@(TokenTag _ _ (Just p2') _):z) =
+  if (p1 == p1') && (p2 == p2')
+     then group2 p1 p2 (x<>y : z)
+     else (x : group2 p1 p2 (y:z))
+group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
+group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
+group2 p1 p2 (x@(TokenTag _ _ (Just _) _):y@(TokenTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
+group2 _ _ [] = []
+
+
+
--- a/src/Gargantext/Text/Terms/Multi/Lang/En.hs
+++ b/src/Gargantext/Text/Terms/Multi/Lang/En.hs
+{-|
+Module      : Gargantext.Text.Terms.Multi.Lang.En
+Description : English Grammar rules to group postag tokens.
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+Rule-based grammars are computed in this english module in order to group
+the tokens into extracted terms.
+
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Gargantext.Text.Terms.Multi.Lang.En (group)
+  where
+
+import Gargantext.Prelude
+import Gargantext.Core.Types
+import Gargantext.Text.Terms.Multi.Group
+
+------------------------------------------------------------------------
+-- | Rule grammar to group tokens
+group :: [TokenTag] -> [TokenTag]
+group [] = []
+group ntags = group2 NP NP
+           $ group2 NP VB
+           $ group2 NP IN
+           $ group2 IN DT
+           $ group2 VB NP
+           $ group2 JJ NP
+           $ group2 JJ JJ
+           $ group2 JJ CC
+           $ ntags
+
+------------------------------------------------------------------------
+--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs)             = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
+--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
+--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs)         = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
+--
+--groupNgrams (x:xs)                                          = (x:(groupNgrams xs))
+
--- a/src/Gargantext/Text/Terms/Multi/Lang/Fr.hs
+++ b/src/Gargantext/Text/Terms/Multi/Lang/Fr.hs
+{-|
+Module      : Gargantext.Text.Terms.Multi.Lang.Fr
+Description : French Grammar rules to group postag tokens.
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+This @group@ function groups horizontally ngrams in their context of
+sentence according to grammars specific of each language. In english, JJ
+is ADJectiv in french.
+
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Gargantext.Text.Terms.Multi.Lang.Fr (group)
+  where
+
+import Gargantext.Prelude
+import Gargantext.Core.Types
+import Gargantext.Text.Terms.Multi.Group (group2)
+
+group :: [TokenTag] -> [TokenTag]
+group [] = []
+group ntags = group2 NP NP
+            $ group2 NP VB
+            $ group2 NP IN
+            $ group2 IN DT
+            $ group2 VB NP
+            $ group2 JJ NP
+            $ group2 NP JJ
+            $ group2 JJ JJ
+            $ group2 JJ CC
+            $ ntags
+
+------------------------------------------------------------------------
+-- TODO
+--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs)             = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
+--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
--- a/src/Gargantext/Text/Ngrams/PosTagging/CoreNLP.hs
+++ b/src/Gargantext/Text/Ngrams/PosTagging/CoreNLP.hs
 {-|
-Module      : Gargantext.Text.Ngrams.PosTagging.CoreNLP
-Description : CoreNLP module
+Module      : Gargantext.Text.Terms.Multi.PosTagging
+Description : PosTagging module using Stanford java REST API
 Copyright   : (c) CNRS, 2017
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

+In corpus linguistics, part-of-speech tagging (POS tagging or PoS
+tagging or POST), also called grammatical tagging or word-category
+disambiguation, is the process of marking up a word in a text (corpus)
+as corresponding to a particular part of speech,[1] based on both its
+definition and its context—i.e., its relationship with adjacent and
+related words in a phrase, sentence, or paragraph. A simplified form of
+this is commonly taught to school-age children, in the identification of
+words as nouns, verbs, adjectives, adverbs, etc.
+
+Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
 -}

 {-# LANGUAGE DataKinds         #-}
 {-# LANGUAGE DeriveGeneric     #-}
+{-# LANGUAGE FlexibleContexts  #-}
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE TemplateHaskell   #-}
 {-# LANGUAGE TypeOperators     #-}
-{-# LANGUAGE FlexibleContexts  #-}

-
-module Gargantext.Text.Ngrams.PosTagging.CoreNLP
+module Gargantext.Text.Terms.Multi.PosTagging
  where

 import GHC.Generics
@@ -26,14 +35,14 @@ import GHC.Show (Show(..))
 import Data.ByteString.Lazy.Internal (ByteString)
 import Data.Aeson.TH (deriveJSON)
 import Data.Aeson
-import Data.Monoid
 import Data.Maybe (isJust)

-import Data.Set (Set, fromList, empty)
+import Data.Set (fromList)

-import Data.Text (Text, splitOn, pack, toLower, unpack)
+import Data.Text (Text, splitOn, pack, toLower)

 import Gargantext.Core (Lang(..))
+import Gargantext.Core.Types
 import Gargantext.Core.Utils.Prefix (unPrefix)
 import Gargantext.Prelude

@@ -44,53 +53,6 @@ import Control.Monad.IO.Class  (MonadIO)
 import Data.String.Conversions (ConvertibleStrings)

 ------------------------------------------------------------------------
-data Tag = POS | NER
-  deriving (Show, Eq)
------------------------------------------------------------------------
------------------------------------------------------------------------
-data POS = NP
-         | JJ  | VB
-         | CC  | IN | DT
-         | NoPos
-  deriving (Show, Generic, Eq)
-
------------------------------------------------------------------------
-instance FromJSON POS where
-  parseJSON = withText "String" (\x -> pure (pos $ unpack x))
-    where
-      pos :: [Char] -> POS
-      pos "NP"  = NP
-      pos "NN"  = NP
-      pos "NC"  = NP
-      pos "NNS" = NP
-      pos "NNP" = NP
-      pos "JJ"  = JJ
-      pos "ADJ" = JJ
-      pos "VB"  = VB
-      pos "VBN" = VB
-      pos "VBG" = VB
-      pos "CC"  = CC
-      pos "IN"  = IN
-      pos "DT"  = DT
-      -- French specific
-      pos "P"  = IN
-      pos  _    = NoPos
-
-instance ToJSON POS
------------------------------------------------------------------------
-data NER = PERSON | ORGANIZATION | LOCATION | NoNER
-  deriving (Show, Generic)
------------------------------------------------------------------------
-instance FromJSON NER where
-  parseJSON = withText "String" (\x -> pure (ner $ unpack x))
-    where
-      ner :: [Char] -> NER
-      ner "PERSON"       = PERSON
-      ner "ORGANIZATION" = ORGANIZATION
-      ner "LOCATION"     = LOCATION
-      ner  _             = NoNER
-
-instance ToJSON NER
 ------------------------------------------------------------------------
 data Token = Token { _tokenIndex                :: Int
                   , _tokenWord                 :: Text
@@ -105,39 +67,21 @@ data Token = Token { _tokenIndex                :: Int
                   } deriving (Show, Generic)
 $(deriveJSON (unPrefix "_token") ''Token)
 ------------------------------------------------------------------------
-data NgramsTag  = NgramsTag { _my_token_word :: [Text]
-                            , _my_token_stem :: Set Text
-                            , _my_token_pos  :: Maybe POS
-                            , _my_token_ner  :: Maybe NER
-                            } deriving (Show)
 ------------------------------------------------------------------------
-tokens2ngramsTags :: [Token] -> [NgramsTag]
-tokens2ngramsTags ts = select $ map ngramsTag ts
+tokens2tokensTags :: [Token] -> [TokenTag]
+tokens2tokensTags ts = filter' $ map tokenTag ts
 ------------------------------------------------------------------------
-ngramsTag :: Token -> NgramsTag
-ngramsTag (Token _ _ w s _ _ p n _ _) = NgramsTag w' s' p n
+tokenTag :: Token -> TokenTag
+tokenTag (Token _ _ w s _ _ p n _ _) = TokenTag w' s' p n
  where
    w' = split w
    s' = fromList (split s)
    split = splitOn (pack " ") . toLower

-select :: [NgramsTag] -> [NgramsTag]
-select xs = filter isNgrams xs
+filter' :: [TokenTag] -> [TokenTag]
+filter' xs = filter isNgrams xs
    where
-      isNgrams (NgramsTag _ _ p n) = isJust p || isJust n
-
-instance Monoid NgramsTag where
-  mempty = NgramsTag [] empty Nothing Nothing
-
-  mappend (NgramsTag w1 s1 p1 n1) (NgramsTag w2 s2 p2 _)
-         = NgramsTag (w1 <> w2) (s1 <> s2) p3 n1
-          where
-            p3 = case (p1,p2) of
-                   (Just JJ, Just NP) -> Just NP
-                   (Just VB, Just NP) -> Just NP
-                   _                  -> p1
-
-  mconcat = foldl mappend mempty
+      isNgrams (TokenTag _ _ p n) = isJust p || isJust n

 ------------------------------------------------------------------------
 data Sentence  = Sentence { _sentenceIndex :: Int