Commit a8b8b4fc authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Refactor termsInText

parent 04e82dd6
...@@ -12,6 +12,7 @@ commentary with @some markup@. ...@@ -12,6 +12,7 @@ commentary with @some markup@.
-} -}
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Terms.WithList where module Gargantext.Core.Text.Terms.WithList where
...@@ -20,7 +21,7 @@ import Data.Ord ...@@ -20,7 +21,7 @@ import Data.Ord
import Data.Text (Text, concat, unwords) import Data.Text (Text, concat, unwords)
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(ZH), defaultLanguage) import Gargantext.Core (Lang(ZH))
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence) import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
import Gargantext.Core.Types (TermsCount) import Gargantext.Core.Types (TermsCount)
...@@ -86,12 +87,19 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern ...@@ -86,12 +87,19 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
-------------------------------------------------------------------------- --------------------------------------------------------------------------
-- Utils -- Utils
type MatchedText = Text type MatchedText = Text
termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)] termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)]
termsInText ZH pats txt = termsInText defaultLanguage pats (addSpaces txt) termsInText lang pats (manipulateText lang -> txt) =
termsInText _ pats txt = groupWithCounts groupWithCounts $ List.concat
$ List.concat $ map (map unwords)
$ map (map unwords) $ extractTermsWithList pats txt
$ extractTermsWithList pats txt
-- | Manipulates the input 'Text' before passing it to 'termsInText'.
-- In particular, if the language is Chinese (ZH), we add spaces.
manipulateText :: Lang -> Text -> Text
manipulateText lang txt = case lang of
ZH -> addSpaces txt
_ -> txt
-------------------------------------------------------------------------- --------------------------------------------------------------------------
extractTermsWithList :: Patterns -> Text -> Corpus [Text] extractTermsWithList :: Patterns -> Text -> Corpus [Text]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment