Commit 3da16377 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TEXTFLOW] enriched ngrams connected

parent 5f6b2708
......@@ -68,12 +68,12 @@ instance HasDBid Lang where
fromDBid _ = panic "HasDBid lang, not implemented"
------------------------------------------------------------------------
data PostTagAlgo = CoreNLP
data PosTagAlgo = CoreNLP
deriving (Show, Read, Eq, Ord, Generic)
instance Hashable PostTagAlgo
instance Hashable PosTagAlgo
instance HasDBid PostTagAlgo where
instance HasDBid PosTagAlgo where
toDBid CoreNLP = 1
fromDBid 1 = CoreNLP
fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"
......
......@@ -126,7 +126,14 @@ class ExtractNgramsT h
-> h
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) =
NgramsPostag l pa po form lem
where
form = text2ngrams $ Text.intercalate " " ng1
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
------------------------------------------------------------------------
cleanNgrams :: Int -> Ngrams -> Ngrams
cleanNgrams s ng
| Text.length (ng ^. ngramsTerms) < s = ng
......
......@@ -63,7 +63,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map
import Gargantext.Core (Lang(..))
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types
......@@ -73,7 +73,7 @@ import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
import Gargantext.Core.Types (Terms(..), POS(NP))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
......@@ -409,15 +409,14 @@ instance ExtractNgramsT HyperdataDocument
$ maybe ["Nothing"] (splitOn ", ")
$ _hd_authors doc
terms' <- map text2ngrams
<$> map (intercalate " " . _terms_label)
terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP)
<$> concat
<$> liftBase (extractTerms lang' $ hasText doc)
pure $ HashMap.fromList $ [(SimpleNgrams source, Map.singleton Sources 1)]
<> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
<> [(SimpleNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
<> [(EnrichedNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where
......
......@@ -33,7 +33,7 @@ import qualified Database.PostgreSQL.Simple as PGS
data NgramsPostag = NgramsPostag { _np_lang :: Lang
, _np_algo :: PostTagAlgo
, _np_algo :: PosTagAlgo
, _np_postag :: POS
, _np_form :: Ngrams
, _np_lem :: Ngrams
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment