Commit bd3c38fd authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Rename ExtractNgramsT -> ExtractNgrams

The final `T` doesn't add anything. It also moves the `HasText`
constraint _outside_ the typeclass definition.
parent f5f0ae1b
...@@ -116,13 +116,12 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams } ...@@ -116,13 +116,12 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance Hashable ExtractedNgrams instance Hashable ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity. -- | A typeclass that represents extracting ngrams from an entity.
class ExtractNgramsT h class HasText h => ExtractNgrams h where
where extractNgrams :: NLPServerConfig
extractNgramsT :: HasText h -> TermType Lang
=> NLPServerConfig -> h
-> TermType Lang -> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
-> h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
------------------------------------------------------------------------ ------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms { .. }) = enrichedTerms l pa po (Terms { .. }) =
......
...@@ -444,7 +444,7 @@ insertMasterDocs cfg nlpServer c lang hs = do ...@@ -444,7 +444,7 @@ insertMasterDocs cfg nlpServer c lang hs = do
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams <- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
(extractNgramsT nlpServer $ withLang lang documentsWithId) (extractNgrams nlpServer $ withLang lang documentsWithId)
(map (B.first contextId2NodeId) documentsWithId) (map (B.first contextId2NodeId) documentsWithId)
runDBTx $ do runDBTx $ do
......
...@@ -25,7 +25,7 @@ import Gargantext.Core (Lang, NLPServerConfig(server)) ...@@ -25,7 +25,7 @@ import Gargantext.Core (Lang, NLPServerConfig(server))
import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Text.Corpus.Parsers (splitOn) import Gargantext.Core.Text.Corpus.Parsers (splitOn)
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang) import Gargantext.Core.Text.Terms (ExtractNgrams(..), ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractTerms, tt_lang)
import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight) import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source )
...@@ -39,59 +39,58 @@ import Gargantext.Prelude ...@@ -39,59 +39,58 @@ import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact instance ExtractNgrams HyperdataContact where
where extractNgrams _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc where
where extract :: TermType Lang
extract :: TermType Lang -> HyperdataContact -> HyperdataContact
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extract _l hc' = do extract _l hc' = do
let authors = map text2ngrams let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a]) $ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc' $ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
-- | Main ngrams extraction functionality. -- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood. -- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ". -- For Sources, Institutes, Authors, this uses simple split on " ".
instance ExtractNgramsT HyperdataDocument instance ExtractNgrams HyperdataDocument where
where extractNgrams :: NLPServerConfig
extractNgramsT :: NLPServerConfig -> TermType Lang
-> TermType Lang -> HyperdataDocument
-> HyperdataDocument -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) extractNgrams ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd where
where extractNgramsT' :: HyperdataDocument
extractNgramsT' :: HyperdataDocument -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) extractNgramsT' doc = do
extractNgramsT' doc = do let source = text2ngrams
let source = text2ngrams $ maybe "Nothing" identity
$ maybe "Nothing" identity $ doc ^. hd_source
$ doc ^. hd_source
institutes = map text2ngrams
institutes = map text2ngrams $ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd)) $ doc ^. hd_institutes
$ doc ^. hd_institutes
authors = map text2ngrams
authors = map text2ngrams $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd)) $ doc ^. hd_authors
$ doc ^. hd_authors
termsWithCounts' :: [(NgramsPostag, TermsCount)] <-
termsWithCounts' :: [(NgramsPostag, TermsCount)] <- map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$>
map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$> liftBase (extractTerms ncs lang $ hasText doc)
liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
pure $ HashMap.fromList $ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ]
$ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ] <> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ] <> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
<> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] <> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
<> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgrams a, HasText a) => ExtractNgrams (Node a)
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where where
extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h extractNgrams ncs l (Node { _node_hyperdata = h }) = extractNgrams ncs l h
instance HasText a => HasText (Node a) instance HasText a => HasText (Node a)
......
...@@ -25,7 +25,7 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory ) ...@@ -25,7 +25,7 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text ( HasText ) import Gargantext.Core.Text ( HasText )
import Gargantext.API.Admin.Orchestrator.Types qualified as API import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms ( ExtractNgramsT ) import Gargantext.Core.Text.Terms ( ExtractNgrams )
import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight) import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
...@@ -52,7 +52,7 @@ type FlowCmdM env err m = ...@@ -52,7 +52,7 @@ type FlowCmdM env err m =
type FlowCorpus a = ( UniqParameters a type FlowCorpus a = ( UniqParameters a
, InsertDb a , InsertDb a
, ExtractNgramsT a , ExtractNgrams a
, HasText a , HasText a
, ToNode a , ToNode a
, ToJSON a , ToJSON a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment