{-|
Module      : Gargantext.Core.Text.Terms.Tokenize
Description : String tokenization
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}


module Gargantext.Core.Text.Terms.Tokenize
where

import Control.Lens (view)
import Control.Monad.Trans.Control (MonadBaseControl)
import Gargantext.Core (Lang)
import Gargantext.Core.NLP (nlpServerGet, HasNLPServer)
import Gargantext.Core.Text.Terms.Multi (tokenTags)
import Gargantext.Core.Types (TokenTag(..))
import Gargantext.Prelude


tokenize :: ( HasNLPServer env
            , MonadReader env m
            , MonadBaseControl IO m) => Lang -> Text -> m [TokenTag]
tokenize lang txt = do
  nlp <- view (nlpServerGet lang)
  liftBase $ concat <$> tokenTags nlp lang txt
