Commit bd3c38fd authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Rename ExtractNgramsT -> ExtractNgrams

The final `T` doesn't add anything. It also moves the `HasText`
constraint _outside_ the typeclass definition.
parent f5f0ae1b
......@@ -116,13 +116,12 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance Hashable ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class ExtractNgramsT h
where
extractNgramsT :: HasText h
=> NLPServerConfig
-> TermType Lang
-> h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
class HasText h => ExtractNgrams h where
extractNgrams :: NLPServerConfig
-> TermType Lang
-> h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms { .. }) =
......
......@@ -444,7 +444,7 @@ insertMasterDocs cfg nlpServer c lang hs = do
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT nlpServer $ withLang lang documentsWithId)
(extractNgrams nlpServer $ withLang lang documentsWithId)
(map (B.first contextId2NodeId) documentsWithId)
runDBTx $ do
......
......@@ -25,7 +25,7 @@ import Gargantext.Core (Lang, NLPServerConfig(server))
import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Text.Corpus.Parsers (splitOn)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang)
import Gargantext.Core.Text.Terms (ExtractNgrams(..), ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractTerms, tt_lang)
import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source )
......@@ -39,59 +39,58 @@ import Gargantext.Prelude
------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact
where
extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where
extract :: TermType Lang -> HyperdataContact
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extract _l hc' = do
let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc'
instance ExtractNgrams HyperdataContact where
extractNgrams _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where
extract :: TermType Lang
-> HyperdataContact
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extract _l hc' = do
let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance ExtractNgramsT HyperdataDocument
where
extractNgramsT :: NLPServerConfig
-> TermType Lang
-> HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
where
extractNgramsT' :: HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extractNgramsT' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ doc ^. hd_source
institutes = map text2ngrams
$ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ doc ^. hd_institutes
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ doc ^. hd_authors
termsWithCounts' :: [(NgramsPostag, TermsCount)] <-
map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$>
liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
$ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ]
<> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
<> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
instance ExtractNgrams HyperdataDocument where
extractNgrams :: NLPServerConfig
-> TermType Lang
-> HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extractNgrams ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
where
extractNgramsT' :: HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extractNgramsT' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ doc ^. hd_source
institutes = map text2ngrams
$ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ doc ^. hd_institutes
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ doc ^. hd_authors
termsWithCounts' :: [(NgramsPostag, TermsCount)] <-
map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$>
liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
$ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ]
<> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
<> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgrams a, HasText a) => ExtractNgrams (Node a)
where
extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h
extractNgrams ncs l (Node { _node_hyperdata = h }) = extractNgrams ncs l h
instance HasText a => HasText (Node a)
......
......@@ -25,7 +25,7 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text ( HasText )
import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms ( ExtractNgramsT )
import Gargantext.Core.Text.Terms ( ExtractNgrams )
import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
......@@ -52,7 +52,7 @@ type FlowCmdM env err m =
type FlowCorpus a = ( UniqParameters a
, InsertDb a
, ExtractNgramsT a
, ExtractNgrams a
, HasText a
, ToNode a
, ToJSON a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment