Commit 00dc93a0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow REFACT] addin ExtractedNgrams type for simple or enriched ngrams extraction

parent 8f0fcd75
...@@ -12,11 +12,12 @@ Portability : POSIX ...@@ -12,11 +12,12 @@ Portability : POSIX
module Gargantext.Core module Gargantext.Core
where where
import Gargantext.Prelude
import GHC.Generics (Generic)
import Data.Aeson import Data.Aeson
import Data.Either(Either(Left)) import Data.Either(Either(Left))
import Data.Hashable (Hashable)
import Data.Swagger import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Prelude
import Servant.API import Servant.API
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -47,6 +48,8 @@ instance FromHttpApiData Lang ...@@ -47,6 +48,8 @@ instance FromHttpApiData Lang
parseUrlPiece "FR" = pure FR parseUrlPiece "FR" = pure FR
parseUrlPiece "All" = pure All parseUrlPiece "All" = pure All
parseUrlPiece _ = Left "Unexpected value of OrderBy" parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance Hashable Lang
allLangs :: [Lang] allLangs :: [Lang]
allLangs = [minBound ..] allLangs = [minBound ..]
...@@ -64,10 +67,11 @@ instance HasDBid Lang where ...@@ -64,10 +67,11 @@ instance HasDBid Lang where
fromDBid 2 = EN fromDBid 2 = EN
fromDBid _ = panic "HasDBid lang, not implemented" fromDBid _ = panic "HasDBid lang, not implemented"
------------------------------------------------------------------------ ------------------------------------------------------------------------
data PostTagAlgo = CoreNLP data PostTagAlgo = CoreNLP
deriving (Show, Read) deriving (Show, Read, Eq, Ord, Generic)
instance Hashable PostTagAlgo
instance HasDBid PostTagAlgo where instance HasDBid PostTagAlgo where
toDBid CoreNLP = 1 toDBid CoreNLP = 1
......
...@@ -36,6 +36,7 @@ module Gargantext.Core.Text.Terms ...@@ -36,6 +36,7 @@ module Gargantext.Core.Text.Terms
import Control.Lens import Control.Lens
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.Traversable import Data.Traversable
...@@ -45,7 +46,6 @@ import qualified Data.List as List ...@@ -45,7 +46,6 @@ import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
...@@ -57,9 +57,11 @@ import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize) ...@@ -57,9 +57,11 @@ import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms) import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams) import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgramsPostag)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams, NgramsId)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
data TermType lang data TermType lang
= Mono { _tt_lang :: !lang } = Mono { _tt_lang :: !lang }
...@@ -111,26 +113,44 @@ withLang (Unsupervised l n s m) ns = Unsupervised l n s m' ...@@ -111,26 +113,44 @@ withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
withLang l _ = l withLang l _ = l
------------------------------------------------------------------------ ------------------------------------------------------------------------
data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
| EnrichedNgrams { unEnrichedNgrams :: NgramsPostag }
deriving (Eq, Ord, Generic)
instance Hashable ExtractedNgrams
class ExtractNgramsT h class ExtractNgramsT h
where where
extractNgramsT :: HasText h extractNgramsT :: HasText h
=> TermType Lang => TermType Lang
-> h -> h
-> Cmd err (HashMap Ngrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
------------------------------------------------------------------------
cleanExtractedNgrams :: Int -> ExtractedNgrams -> ExtractedNgrams
cleanExtractedNgrams s (SimpleNgrams ng)
| Text.length (ng ^. ngramsTerms) < s = SimpleNgrams ng
| otherwise = SimpleNgrams $ text2ngrams (Text.take s (ng ^. ngramsTerms))
cleanExtractedNgrams s _ = undefined
extracted2ngrams :: ExtractedNgrams -> Ngrams
extracted2ngrams (SimpleNgrams ng) = ng
extracted2ngrams _ = undefined
filterNgrams :: Int -> HashMap Ngrams (Map NgramsType Int) isSimpleNgrams :: ExtractedNgrams -> Bool
-> HashMap Ngrams (Map NgramsType Int) isSimpleNgrams (SimpleNgrams _) = True
filterNgrams s = HashMap.mapKeys filter isSimpleNgrams _ = False
where
filter ng
| Text.length (ng ^. ngramsTerms) < s = ng
| otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms))
insertExtractedNgrams :: [ ExtractedNgrams ] -> Cmd err (HashMap Text NgramsId)
insertExtractedNgrams ngs = do
let (s, e) = List.partition isSimpleNgrams ngs
m1 <- insertNgrams (map unSimpleNgrams s)
m2 <- insertNgramsPostag (map unEnrichedNgrams e)
pure $ m1 <> m2
-- =======================================================
------------------------------------------------------------------------
-- | Terms from Text -- | Terms from Text
-- Mono : mono terms -- Mono : mono terms
-- Multi : multi terms -- Multi : multi terms
...@@ -147,15 +167,6 @@ terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s ( ...@@ -147,15 +167,6 @@ terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (
------------------------------------------------------------------------ ------------------------------------------------------------------------
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;." :: String)
-- | Unsupervised ngrams extraction -- | Unsupervised ngrams extraction
-- language agnostic extraction -- language agnostic extraction
-- TODO: remove IO -- TODO: remove IO
...@@ -175,6 +186,8 @@ termsUnsupervised (Unsupervised l n s m) = ...@@ -175,6 +186,8 @@ termsUnsupervised (Unsupervised l n s m) =
. uniText . uniText
termsUnsupervised _ = undefined termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token () newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t) newTries n t = buildTries n (fmap toToken $ uniText t)
...@@ -185,3 +198,12 @@ uniText = map (List.filter (not . isPunctuation)) ...@@ -185,3 +198,12 @@ uniText = map (List.filter (not . isPunctuation))
. sentences -- TODO get sentences according to lang . sentences -- TODO get sentences according to lang
. Text.toLower . Text.toLower
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;." :: String)
...@@ -25,25 +25,19 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging ...@@ -25,25 +25,19 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
module Gargantext.Core.Text.Terms.Multi.PosTagging module Gargantext.Core.Text.Terms.Multi.PosTagging
where where
import GHC.Generics
import Data.ByteString.Lazy.Internal (ByteString)
import Data.Aeson.TH (deriveJSON)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.ByteString.Lazy.Internal (ByteString)
import Data.Set (fromList) import Data.Set (fromList)
import Data.String.Conversions (ConvertibleStrings)
import Data.Text (Text, splitOn, pack, toLower) import Data.Text (Text, splitOn, pack, toLower)
import GHC.Generics
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Simple import Network.HTTP.Simple
import Data.String.Conversions (ConvertibleStrings)
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Token = Token { _tokenIndex :: Int data Token = Token { _tokenIndex :: Int
......
...@@ -31,6 +31,7 @@ import Control.Lens (Prism', (#), makeLenses, over) ...@@ -31,6 +31,7 @@ import Control.Lens (Prism', (#), makeLenses, over)
import Control.Monad.Except (MonadError(throwError)) import Control.Monad.Except (MonadError(throwError))
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Hashable (Hashable)
import Data.Maybe import Data.Maybe
import Data.Monoid import Data.Monoid
import Data.Semigroup import Data.Semigroup
...@@ -40,11 +41,11 @@ import Data.Swagger (ToSchema(..)) ...@@ -40,11 +41,11 @@ import Data.Swagger (ToSchema(..))
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Data.Validity import Data.Validity
import GHC.Generics import GHC.Generics
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Utils.Prefix (unPrefix, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Ordering = Down | Up data Ordering = Down | Up
...@@ -74,7 +75,7 @@ data POS = NP ...@@ -74,7 +75,7 @@ data POS = NP
| JJ | VB | JJ | VB
| CC | IN | DT | CC | IN | DT
| NoPos | NoPos
deriving (Show, Generic, Eq) deriving (Show, Generic, Eq, Ord)
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromJSON POS where instance FromJSON POS where
parseJSON = withText "String" (\x -> pure (pos $ unpack x)) parseJSON = withText "String" (\x -> pure (pos $ unpack x))
...@@ -94,10 +95,11 @@ instance FromJSON POS where ...@@ -94,10 +95,11 @@ instance FromJSON POS where
pos "IN" = IN pos "IN" = IN
pos "DT" = DT pos "DT" = DT
-- French specific -- French specific
pos "P" = IN pos "P" = IN
pos _ = NoPos pos _ = NoPos
instance ToJSON POS instance ToJSON POS
instance Hashable POS
------------------------------------------------------------------------ ------------------------------------------------------------------------
data NER = PERSON | ORGANIZATION | LOCATION | NoNER data NER = PERSON | ORGANIZATION | LOCATION | NoNER
deriving (Show, Generic) deriving (Show, Generic)
......
...@@ -263,13 +263,15 @@ insertMasterDocs c lang hs = do ...@@ -263,13 +263,15 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring -- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int)) -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs :: HashMap Ngrams (Map NgramsType (Map NodeId Int)) mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
<- mapNodeIdNgrams <- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
(extractNgramsT $ withLang lang documentsWithId) (extractNgramsT $ withLang lang documentsWithId)
documentsWithId documentsWithId
terms2id <- insertNgrams $ HashMap.keys mapNgramsDocs terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- to be removed -- to be removed
let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
...@@ -372,28 +374,28 @@ mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap ...@@ -372,28 +374,28 @@ mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact instance ExtractNgramsT HyperdataContact
where where
extractNgramsT l hc = filterNgrams 255 <$> extract l hc extractNgramsT l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where where
extract :: TermType Lang -> HyperdataContact extract :: TermType Lang -> HyperdataContact
-> Cmd err (HashMap Ngrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extract _l hc' = do extract _l hc' = do
let authors = map text2ngrams let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a]) $ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc' $ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ] pure $ HashMap.fromList $ [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
instance ExtractNgramsT HyperdataDocument instance ExtractNgramsT HyperdataDocument
where where
extractNgramsT :: TermType Lang extractNgramsT :: TermType Lang
-> HyperdataDocument -> HyperdataDocument
-> Cmd err (HashMap Ngrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT lang hd = filterNgrams 255 <$> extractNgramsT' lang hd extractNgramsT lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' lang hd
where where
extractNgramsT' :: TermType Lang extractNgramsT' :: TermType Lang
-> HyperdataDocument -> HyperdataDocument
-> Cmd err (HashMap Ngrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT' lang' doc = do extractNgramsT' lang' doc = do
let source = text2ngrams let source = text2ngrams
$ maybe "Nothing" identity $ maybe "Nothing" identity
...@@ -412,10 +414,10 @@ instance ExtractNgramsT HyperdataDocument ...@@ -412,10 +414,10 @@ instance ExtractNgramsT HyperdataDocument
<$> concat <$> concat
<$> liftBase (extractTerms lang' $ hasText doc) <$> liftBase (extractTerms lang' $ hasText doc)
pure $ HashMap.fromList $ [(source, Map.singleton Sources 1)] pure $ HashMap.fromList $ [(SimpleNgrams source, Map.singleton Sources 1)]
<> [(i', Map.singleton Institutes 1) | i' <- institutes ] <> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(a', Map.singleton Authors 1) | a' <- authors ] <> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ] <> [(SimpleNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a) instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where where
......
...@@ -22,10 +22,12 @@ module Gargantext.Database.Query.Table.Ngrams ...@@ -22,10 +22,12 @@ module Gargantext.Database.Query.Table.Ngrams
where where
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.HashMap.Strict (HashMap)
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
import Data.Map (Map, fromList) import Data.Map (Map, fromList)
import Data.Text (Text) import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
import qualified Data.HashMap.Strict as HashMap
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, Cmd) import Gargantext.Database.Prelude (runOpaQuery, Cmd)
...@@ -64,8 +66,8 @@ _dbGetNgramsDb = runOpaQuery queryNgramsTable ...@@ -64,8 +66,8 @@ _dbGetNgramsDb = runOpaQuery queryNgramsTable
-- TODO-ACCESS: access must not be checked here but when insertNgrams is called. -- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
insertNgrams :: [Ngrams] -> Cmd err (Map Text NgramsId) insertNgrams :: [Ngrams] -> Cmd err (HashMap Text NgramsId)
insertNgrams ns = fromList <$> map (\(Indexed i t) -> (t, i)) <$> (insertNgrams' ns) insertNgrams ns = HashMap.fromList <$> map (\(Indexed i t) -> (t, i)) <$> (insertNgrams' ns)
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called. -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
insertNgrams' :: [Ngrams] -> Cmd err [Indexed Int Text] insertNgrams' :: [Ngrams] -> Cmd err [Indexed Int Text]
......
...@@ -16,16 +16,30 @@ Portability : POSIX ...@@ -16,16 +16,30 @@ Portability : POSIX
module Gargantext.Database.Query.Table.NgramsPostag module Gargantext.Database.Query.Table.NgramsPostag
where where
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
data NgramsPostag = NgramsPostag { _np_lang :: Lang
, _np_algo :: PostTagAlgo
, _np_postag :: POS
, _np_form :: Ngrams
, _np_lem :: Ngrams
}
deriving (Eq, Ord, Generic)
instance Hashable NgramsPostag
type NgramsPostagInsert = ( Int type NgramsPostagInsert = ( Int
, Int , Int
...@@ -36,9 +50,24 @@ type NgramsPostagInsert = ( Int ...@@ -36,9 +50,24 @@ type NgramsPostagInsert = ( Int
, Int , Int
) )
toInsert :: NgramsPostag -> NgramsPostagInsert
insertNgramsPostag :: [NgramsPostagInsert] -> Cmd err [Indexed Int Text] toInsert (NgramsPostag l a p form lem) =
insertNgramsPostag ns = runPGSQuery queryInsertNgramsPostag (PGS.Only $ Values fields ns) ( toDBid l
, toDBid a
, cs $ show p
, _ngramsTerms form
, _ngramsSize form
, _ngramsTerms lem
, _ngramsSize lem
)
insertNgramsPostag :: [NgramsPostag] -> Cmd err (HashMap Text NgramsId)
insertNgramsPostag ns = HashMap.fromList
<$> map (\(Indexed t i) -> (t,i))
<$> insertNgramsPostag' (map toInsert ns)
insertNgramsPostag' :: [NgramsPostagInsert] -> Cmd err [Indexed Text Int]
insertNgramsPostag' ns = runPGSQuery queryInsertNgramsPostag (PGS.Only $ Values fields ns)
where where
fields = map (\t -> QualifiedIdentifier Nothing t) $ snd fields_name fields = map (\t -> QualifiedIdentifier Nothing t) $ snd fields_name
......
...@@ -19,6 +19,7 @@ Ngrams connection to the Database. ...@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams module Gargantext.Database.Schema.Ngrams
where where
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
import Codec.Serialise (Serialise()) import Codec.Serialise (Serialise())
import Control.Lens (over) import Control.Lens (over)
...@@ -35,6 +36,7 @@ import Text.Read (read) ...@@ -35,6 +36,7 @@ import Text.Read (read)
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
import qualified Data.HashMap.Strict as HashMap
type NgramsId = Int type NgramsId = Int
...@@ -178,19 +180,19 @@ instance Functor NgramsT where ...@@ -178,19 +180,19 @@ instance Functor NgramsT where
fmap = over ngramsT fmap = over ngramsT
----------------------------------------------------------------------- -----------------------------------------------------------------------
withMap :: Map Text NgramsId -> Text -> NgramsId withMap :: HashMap Text NgramsId -> Text -> NgramsId
withMap m n = maybe (panic "withMap: should not happen") identity (lookup n m) withMap m n = maybe (panic "withMap: should not happen") identity (HashMap.lookup n m)
indexNgramsT :: Map Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams) indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
indexNgramsT = fmap . indexNgramsWith . withMap indexNgramsT = fmap . indexNgramsWith . withMap
-- | TODO replace NgramsT whith Typed NgramsType Ngrams -- | TODO replace NgramsT whith Typed NgramsType Ngrams
indexTypedNgrams :: Map Text NgramsId indexTypedNgrams :: HashMap Text NgramsId
-> Typed NgramsType Ngrams -> Typed NgramsType Ngrams
-> Typed NgramsType (Indexed Int Ngrams) -> Typed NgramsType (Indexed Int Ngrams)
indexTypedNgrams = fmap . indexNgramsWith . withMap indexTypedNgrams = fmap . indexNgramsWith . withMap
indexNgrams :: Map Text NgramsId -> Ngrams -> Indexed Int Ngrams indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
indexNgrams = indexNgramsWith . withMap indexNgrams = indexNgramsWith . withMap
indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
......
...@@ -49,7 +49,7 @@ data PosTag = PosTag { unPosTag :: Text } ...@@ -49,7 +49,7 @@ data PosTag = PosTag { unPosTag :: Text }
| NER { unNER :: Text } -- TODO | NER { unNER :: Text } -- TODO
------------------------------------------------------------------------ ------------------------------------------------------------------------
type NgramsPostag = NgramsPostagPoly (Maybe Int) Lang PostTagAlgo (Maybe PosTag) NgramsTerm NgramsTerm (Maybe Int) -- type NgramsPostag = NgramsPostagPoly (Maybe Int) Lang PostTagAlgo (Maybe PosTag) NgramsTerm NgramsTerm (Maybe Int)
type NgramsPostagDB = NgramsPostagPoly (Maybe Int) Int Int (Maybe Text) Int Int Int type NgramsPostagDB = NgramsPostagPoly (Maybe Int) Int Int (Maybe Text) Int Int Int
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment