Commit d8bdcdee authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Tighten API exports of Gargantext.Core.Text.Terms.WithList

parent 97b483d7
......@@ -14,7 +14,16 @@ commentary with @some markup@.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Terms.WithList where
module Gargantext.Core.Text.Terms.WithList (
termsInText
, buildPatterns
, Patterns
, Pattern(..)
, MatchedText
, buildPatternsWith
, extractTermsWithList
, extractTermsWithList'
) where
import Data.Algorithms.KMP qualified as KMP
import Data.IntMap.Strict qualified as IntMap
......@@ -68,19 +77,22 @@ replaceTerms rplaceTerms pats terms = go 0
if len2 < len1 then (len1, lab1) else (len2, lab2)
buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns
buildPatternsWith ZH ts = buildPatterns $ map (\k -> (Text.chunksOf 1 $ unNgramsTerm k, [])) ts
buildPatternsWith _ ts = buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts
buildPatternsWith ZH ts = buildPatterns $ map (\(NgramsTerm k) -> (Text.chunksOf 1 k, [])) ts
buildPatternsWith _ ts = buildPatterns $ map (\(NgramsTerm k) -> (Text.splitOn " " k, [])) ts
buildPatterns :: TermList -> Patterns
buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
buildPatterns = sortWith (Down . _pat_length) . concatMap (uncurry buildPattern)
where
buildPattern (label, alts) = map f $ map (\alt -> filter (/= "") alt) (label : alts)
where
f alt | "" `elem` alt = errorTrace ("buildPatterns: ERR1" <> show(label))
| null alt = errorTrace "buildPatterns: ERR2"
| otherwise =
Pattern (KMP.build alt) (length alt) label
--(Terms label $ Set.empty) -- TODO check stems
buildPattern :: Label -> [MultiTerm] -> [Pattern]
buildPattern label alts = mapMaybe (mkPattern label) $ map (\alt -> filter (/= "") alt) (label : alts)
mkPattern :: Label -> [Text] -> Maybe Pattern
mkPattern label alt
| "" `elem` alt = Nothing
| null alt = Nothing
| otherwise = Just $
Pattern (KMP.build alt) (length alt) label
--(Terms label $ Set.empty) -- TODO check stems
--------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment