[TEXTFLOW] enriched ngrams connected

......@@ -68,12 +68,12 @@ instance HasDBid Lang where
fromDBid _ = panic "HasDBid lang, not implemented"
data PostTagAlgo = CoreNLP
data PosTagAlgo = CoreNLP
deriving (Show, Read, Eq, Ord, Generic)
instance Hashable PostTagAlgo
instance Hashable PosTagAlgo
instance HasDBid PostTagAlgo where
instance HasDBid PosTagAlgo where
toDBid CoreNLP = 1
fromDBid 1 = CoreNLP
fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"
......@@ -126,7 +126,14 @@ class ExtractNgramsT h
-> h
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) =
NgramsPostag l pa po form lem
form = text2ngrams $ Text.intercalate " " ng1
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
cleanNgrams :: Int -> Ngrams -> Ngrams
cleanNgrams s ng
| Text.length (ng ^. ngramsTerms) < s = ng
......@@ -63,7 +63,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map
import Gargantext.Core (Lang(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types
......@@ -73,7 +73,7 @@ import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
import Gargantext.Core.Types (Terms(..), POS(NP))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
......@@ -409,15 +409,14 @@ instance ExtractNgramsT HyperdataDocument
$ maybe ["Nothing"] (splitOn ", ")
$ _hd_authors doc
terms' <- map text2ngrams
<$> map (intercalate " " . _terms_label)
terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP)
<$> concat
<$> liftBase (extractTerms lang' $ hasText doc)
pure $ HashMap.fromList $ [(SimpleNgrams source, Map.singleton Sources 1)]
<> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
<> [(SimpleNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
<> [(EnrichedNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
......@@ -33,7 +33,7 @@ import qualified Database.PostgreSQL.Simple as PGS
data NgramsPostag = NgramsPostag { _np_lang :: Lang
, _np_algo :: PostTagAlgo
, _np_algo :: PosTagAlgo
, _np_postag :: POS
, _np_form :: Ngrams
, _np_lem :: Ngrams
