Commit 3870ee90 authored by Nicolas Pouillard's avatar Nicolas Pouillard

Improve text2ngrams to strip spaces

parent 5392065c
Pipeline #1129 canceled with stage
......@@ -55,7 +55,7 @@ import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..))
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams)
import Gargantext.Prelude
......@@ -118,11 +118,11 @@ class ExtractNgramsT h
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = Map.fromList $ map (\a -> filter' s a) $ Map.toList ms
filterNgramsT s ms = Map.fromList $ map filter' $ Map.toList ms
where
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y)
False -> (Ngrams (Text.take s' t) n , y)
filter' (ng,y)
| Text.length (ng ^. ngramsTerms) < s = (ng,y)
| otherwise = (text2ngrams (Text.take s (ng ^. ngramsTerms)), y)
-- =======================================================
......
......@@ -25,7 +25,7 @@ import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.Map (Map, fromList, lookup)
import Data.Text (Text, splitOn, pack)
import Data.Text (Text, splitOn, pack, strip)
import Gargantext.Core.Types (TODO(..))
import Gargantext.Prelude
import Prelude (Functor)
......@@ -140,16 +140,18 @@ fromNgramsTypeId id = lookup id
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
data Ngrams = Ngrams { _ngramsTerms :: Text
, _ngramsSize :: Int
} deriving (Generic, Show, Eq, Ord)
data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
, _ngramsSize :: Int
} deriving (Generic, Show, Eq, Ord)
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
toRow (Ngrams t s) = [toField t, toField s]
toRow (UnsafeNgrams t s) = [toField t, toField s]
text2ngrams :: Text -> Ngrams
text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where
txt' = strip txt
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment