Mono.hs 1.17 KB
{-|
Module      : Gargantext.Text.Terms.Mono
Description : Mono Terms module
Copyright   : (c) CNRS, 2017 - present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Mono-terms are Nterms where n == 1.

-}

{-# LANGUAGE NoImplicitPrelude #-}

module Gargantext.Text.Terms.Mono (monoterms, monoterms')
  where

import Data.Text (Text, toLower, split, splitOn, pack)
import qualified Data.Set as S

import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Terms.Mono.Stem (stem)

import Gargantext.Prelude
import Data.Char (isAlphaNum, isSpace)

monoterms' :: Lang -> Text -> [Terms]
monoterms' l txt = map (text2terms l) $ monoterms txt

monoterms :: Text -> [Text]
monoterms txt = map toLower $ split isWord txt
  where
    isWord c = c `elem` [' ', '\'', ',', ';']

text2terms :: Lang -> Text -> Terms
text2terms lang txt = Terms label stems
  where
    label = splitOn (pack " ") txt
    stems = S.fromList $ map (stem lang) label

  --monograms :: Text -> [Text]
--monograms xs = monograms $ toLower $ filter isGram xs

isGram :: Char -> Bool
isGram  c  = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']