Commit 3da16377 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TEXTFLOW] enriched ngrams connected

parent 5f6b2708
Pipeline #1334 failed with stage
...@@ -68,12 +68,12 @@ instance HasDBid Lang where ...@@ -68,12 +68,12 @@ instance HasDBid Lang where
fromDBid _ = panic "HasDBid lang, not implemented" fromDBid _ = panic "HasDBid lang, not implemented"
------------------------------------------------------------------------ ------------------------------------------------------------------------
data PostTagAlgo = CoreNLP data PosTagAlgo = CoreNLP
deriving (Show, Read, Eq, Ord, Generic) deriving (Show, Read, Eq, Ord, Generic)
instance Hashable PostTagAlgo instance Hashable PosTagAlgo
instance HasDBid PostTagAlgo where instance HasDBid PosTagAlgo where
toDBid CoreNLP = 1 toDBid CoreNLP = 1
fromDBid 1 = CoreNLP fromDBid 1 = CoreNLP
fromDBid _ = panic "HasDBid posTagAlgo : Not implemented" fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"
......
...@@ -126,7 +126,14 @@ class ExtractNgramsT h ...@@ -126,7 +126,14 @@ class ExtractNgramsT h
-> h -> h
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
------------------------------------------------------------------------ ------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) =
NgramsPostag l pa po form lem
where
form = text2ngrams $ Text.intercalate " " ng1
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
------------------------------------------------------------------------
cleanNgrams :: Int -> Ngrams -> Ngrams cleanNgrams :: Int -> Ngrams -> Ngrams
cleanNgrams s ng cleanNgrams s ng
| Text.length (ng ^. ngramsTerms) < s = ng | Text.length (ng ^. ngramsTerms) < s = ng
......
...@@ -63,7 +63,7 @@ import qualified Data.HashMap.Strict as HashMap ...@@ -63,7 +63,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map import qualified Data.Map as Map
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName) import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile) import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
...@@ -73,7 +73,7 @@ import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) ...@@ -73,7 +73,7 @@ import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..), POS(NP))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
...@@ -409,15 +409,14 @@ instance ExtractNgramsT HyperdataDocument ...@@ -409,15 +409,14 @@ instance ExtractNgramsT HyperdataDocument
$ maybe ["Nothing"] (splitOn ", ") $ maybe ["Nothing"] (splitOn ", ")
$ _hd_authors doc $ _hd_authors doc
terms' <- map text2ngrams terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP)
<$> map (intercalate " " . _terms_label)
<$> concat <$> concat
<$> liftBase (extractTerms lang' $ hasText doc) <$> liftBase (extractTerms lang' $ hasText doc)
pure $ HashMap.fromList $ [(SimpleNgrams source, Map.singleton Sources 1)] pure $ HashMap.fromList $ [(SimpleNgrams source, Map.singleton Sources 1)]
<> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ] <> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ] <> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
<> [(SimpleNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ] <> [(EnrichedNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a) instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where where
......
...@@ -33,7 +33,7 @@ import qualified Database.PostgreSQL.Simple as PGS ...@@ -33,7 +33,7 @@ import qualified Database.PostgreSQL.Simple as PGS
data NgramsPostag = NgramsPostag { _np_lang :: Lang data NgramsPostag = NgramsPostag { _np_lang :: Lang
, _np_algo :: PostTagAlgo , _np_algo :: PosTagAlgo
, _np_postag :: POS , _np_postag :: POS
, _np_form :: Ngrams , _np_form :: Ngrams
, _np_lem :: Ngrams , _np_lem :: Ngrams
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment