Verified Commit e7735135 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 477-dev-flow-zip-file-upload-2

parents 25123b89 d362b468
...@@ -644,7 +644,6 @@ library ...@@ -644,7 +644,6 @@ library
, transformers-base ^>= 0.4.6 , transformers-base ^>= 0.4.6
, tree-diff , tree-diff
, tuple ^>= 0.3.0.2 , tuple ^>= 0.3.0.2
, unbounded-delays >= 0.1.1 && < 0.2
, unicode-collation >= 0.1.3.5 , unicode-collation >= 0.1.3.5
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
-- needed for Worker / System.Posix.Signals -- needed for Worker / System.Posix.Signals
......
...@@ -15,7 +15,6 @@ Portability : POSIX ...@@ -15,7 +15,6 @@ Portability : POSIX
{-# LANGUAGE KindSignatures #-} -- for use of Endpoint (name :: Symbol) {-# LANGUAGE KindSignatures #-} -- for use of Endpoint (name :: Symbol)
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PartialTypeSignatures #-} -- to automatically use suggested type hole signatures during compilation {-# LANGUAGE PartialTypeSignatures #-} -- to automatically use suggested type hole signatures during compilation
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.GraphQL where module Gargantext.API.GraphQL where
......
...@@ -22,8 +22,7 @@ import Data.Morpheus.Types ...@@ -22,8 +22,7 @@ import Data.Morpheus.Types
, ResolverM , ResolverM
, QUERY , QUERY
) )
import Data.Text (pack, unpack) import Data.Text (pack)
import Data.Text qualified as Text
import Data.Time.Format.ISO8601 (iso8601Show) import Data.Time.Format.ISO8601 (iso8601Show)
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser ) import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Auth.PolicyCheck ( nodeWriteChecks, AccessPolicyManager ) import Gargantext.API.Auth.PolicyCheck ( nodeWriteChecks, AccessPolicyManager )
...@@ -97,7 +96,7 @@ data ContextsForNgramsArgs ...@@ -97,7 +96,7 @@ data ContextsForNgramsArgs
= ContextsForNgramsArgs = ContextsForNgramsArgs
{ corpus_id :: Int { corpus_id :: Int
, ngrams_terms :: [Text] , ngrams_terms :: [Text]
, and_logic :: Text , and_logic :: Bool
} deriving (Generic, GQLType) } deriving (Generic, GQLType)
data NodeContextCategoryMArgs = NodeContextCategoryMArgs data NodeContextCategoryMArgs = NodeContextCategoryMArgs
...@@ -153,9 +152,10 @@ dbNodeContext context_id node_id = do ...@@ -153,9 +152,10 @@ dbNodeContext context_id node_id = do
-- | Returns list of `ContextGQL` for given ngrams in given corpus id. -- | Returns list of `ContextGQL` for given ngrams in given corpus id.
dbContextForNgrams dbContextForNgrams
:: (IsDBEnvExtra env) :: (IsDBEnvExtra env)
=> Int -> [Text] -> Text -> GqlM e env [ContextGQL] => Int -> [Text] -> Bool -> GqlM e env [ContextGQL]
dbContextForNgrams node_id ngrams_terms and_logic = do dbContextForNgrams node_id ngrams_terms and_logic = do
contextsForNgramsTerms <- lift $ runDBQuery $ getContextsForNgramsTerms (UnsafeMkNodeId node_id) ngrams_terms ( readMaybe $ unpack $ Text.toTitle and_logic ) contextsForNgramsTerms <- lift $ runDBQuery $
getContextsForNgramsTerms (UnsafeMkNodeId node_id) ngrams_terms and_logic
--lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms --lift $ printDebug "[dbContextForNgrams] contextsForNgramsTerms" contextsForNgramsTerms
pure $ toContextGQL <$> contextsForNgramsTerms pure $ toContextGQL <$> contextsForNgramsTerms
......
...@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams } ...@@ -116,11 +116,11 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance Hashable ExtractedNgrams instance Hashable ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity. -- | A typeclass that represents extracting ngrams from an entity.
class ExtractNgrams h where class Monad m => ExtractNgrams m h where
extractNgrams :: NLPServerConfig extractNgrams :: NLPServerConfig
-> TermType Lang -> TermType Lang
-> h -> h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) -> m (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
------------------------------------------------------------------------ ------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
......
...@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1. ...@@ -11,9 +11,19 @@ Multi-terms are ngrams where n > 1.
-} -}
module Gargantext.Core.Text.Terms.Multi (multiterms, Terms(..), tokenTag2terms, multiterms_rake, tokenTagsWith, tokenTags, cleanTextForNLP) module Gargantext.Core.Text.Terms.Multi (
multiterms
, Terms(..)
, MultitermsExtractionException(..)
, tokenTag2terms
, multiterms_rake
, tokenTagsWith
, tokenTags
, cleanTextForNLP
)
where where
import Control.Exception.Safe qualified as Safe
import Data.Attoparsec.Text as DAT (space, notChar, string ) import Data.Attoparsec.Text as DAT (space, notChar, string )
import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo(..)) import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo(..))
import Gargantext.Core.Text.Terms.Multi.Lang.En qualified as En import Gargantext.Core.Text.Terms.Multi.Lang.En qualified as En
...@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T ...@@ -25,14 +35,23 @@ import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(T
import Gargantext.Core.Utils (groupWithCounts) import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.SpacyNLP qualified as SpacyNLP import Gargantext.Utils.SpacyNLP qualified as SpacyNLP
import Network.HTTP.Client
import Replace.Attoparsec.Text as RAT ( streamEdit ) import Replace.Attoparsec.Text as RAT ( streamEdit )
------------------------------------------------------------------- -------------------------------------------------------------------
type NLP_API = Lang -> Text -> IO PosSentences type NLP_API = Lang -> Text -> IO PosSentences
data MultitermsExtractionException
= MEE_nlp_server_http_exception !NLPServerConfig !HttpException
deriving Show
instance Exception MultitermsExtractionException
------------------------------------------------------------------- -------------------------------------------------------------------
-- | Extracts the terms from the input 'txt'. Throws a
-- 'MultitermExtractionException' in case we fail.
multiterms :: NLPServerConfig -> Lang -> Text -> IO [TermsWithCount] multiterms :: NLPServerConfig -> Lang -> Text -> IO [TermsWithCount]
multiterms nsc l txt = do multiterms nsc l txt = handle (\ex -> Safe.throwIO $ MEE_nlp_server_http_exception nsc ex) $ do
let txt' = cleanTextForNLP txt let txt' = cleanTextForNLP txt
if txt' == "" if txt' == ""
then do then do
......
...@@ -54,11 +54,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -54,11 +54,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
) )
where where
import Async.Worker qualified as W
import Conduit import Conduit
import Control.Concurrent.Timeout qualified as Timeout
import Control.Exception.Safe qualified as CES import Control.Exception.Safe qualified as CES
import Control.Lens ( to, view ) import Control.Lens ( to, view )
import Control.Exception.Safe (catch, MonadCatch)
import Data.Conduit qualified as C import Data.Conduit qualified as C
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
import Data.Conduit.List qualified as CL import Data.Conduit.List qualified as CL
...@@ -118,6 +117,7 @@ import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) ) ...@@ -118,6 +117,7 @@ import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Imports for upgrade function -- Imports for upgrade function
import Gargantext.Database.Query.Tree.Error ( HasTreeError ) import Gargantext.Database.Query.Tree.Error ( HasTreeError )
import Gargantext.Core.Text.Terms.Multi (MultitermsExtractionException)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -258,9 +258,11 @@ flowCorpus :: ( IsDBCmd env err m ...@@ -258,9 +258,11 @@ flowCorpus :: ( IsDBCmd env err m
, HasTreeError err , HasTreeError err
, HasValidationError err , HasValidationError err
, FlowCorpus a , FlowCorpus a
, ExtractNgrams m a
, MonadJobStatus m , MonadJobStatus m
, CES.MonadCatch m , MonadCatch m
, HasCentralExchangeNotification env, Show a ) , HasCentralExchangeNotification env
)
=> MkCorpusUser => MkCorpusUser
-> TermType Lang -> TermType Lang
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
...@@ -278,11 +280,11 @@ flow :: forall env err m a c. ...@@ -278,11 +280,11 @@ flow :: forall env err m a c.
, HasTreeError err , HasTreeError err
, HasValidationError err , HasValidationError err
, FlowCorpus a , FlowCorpus a
, ExtractNgrams m a
, MkCorpus c , MkCorpus c
, MonadJobStatus m , MonadJobStatus m
, HasCentralExchangeNotification env , HasCentralExchangeNotification env
, CES.MonadCatch m , MonadCatch m
, Show a
) )
=> Maybe c => Maybe c
-> MkCorpusUser -> MkCorpusUser
...@@ -320,10 +322,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m ...@@ -320,10 +322,10 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
, HasNodeError err , HasNodeError err
, HasNLPServer env , HasNLPServer env
, FlowCorpus document , FlowCorpus document
, ExtractNgrams m document
, MkCorpus corpus , MkCorpus corpus
, MonadLogger m , MonadLogger m
, CES.MonadCatch m , MonadCatch m
, Show document
) )
=> Maybe corpus => Maybe corpus
-> TermType Lang -> TermType Lang
...@@ -475,7 +477,7 @@ data InsertDocError ...@@ -475,7 +477,7 @@ data InsertDocError
extractNgramsFromDocument :: ( UniqParameters doc extractNgramsFromDocument :: ( UniqParameters doc
, HasText doc , HasText doc
, ExtractNgrams doc , ExtractNgrams m doc
, IsDBCmd err env m , IsDBCmd err env m
, MonadLogger m , MonadLogger m
, CES.MonadCatch m , CES.MonadCatch m
...@@ -491,13 +493,9 @@ extractNgramsFromDocument nlpServer lang doc = ...@@ -491,13 +493,9 @@ extractNgramsFromDocument nlpServer lang doc =
-- will still be added to the corpus and we can try to regen the ngrams at a later stage. -- will still be added to the corpus and we can try to regen the ngrams at a later stage.
UncommittedNgrams . Map.singleton docId <$> UncommittedNgrams . Map.singleton docId <$>
(documentIdWithNgrams (extractNgrams nlpServer $ withLang lang [doc]) (Indexed docId doc) (documentIdWithNgrams (extractNgrams nlpServer $ withLang lang [doc]) (Indexed docId doc)
`CES.catches` `catch` \(e :: MultitermsExtractionException) -> do
[ CES.Handler $ \(e :: Timeout.Timeout) -> CES.throw e
, CES.Handler $ \(e :: W.KillWorkerSafely) -> CES.throw e
, CES.Handler $ \(e :: CES.SomeException) -> do
$(logLocM) ERROR $ T.pack $ "Document with hash " <> show docId <> " failed ngrams extraction due to an exception: " <> displayException e $(logLocM) ERROR $ T.pack $ "Document with hash " <> show docId <> " failed ngrams extraction due to an exception: " <> displayException e
pure $ DocumentIdWithNgrams (Indexed docId doc) mempty pure $ DocumentIdWithNgrams (Indexed docId doc) mempty
]
) )
where where
docId = DocumentHashId $ newUniqIdHash doc docId = DocumentHashId $ newUniqIdHash doc
...@@ -525,7 +523,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do ...@@ -525,7 +523,7 @@ commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
extractNgramsFromDocuments :: forall doc env err m. extractNgramsFromDocuments :: forall doc env err m.
( HasText doc ( HasText doc
, UniqParameters doc , UniqParameters doc
, ExtractNgrams doc , ExtractNgrams m doc
, IsDBCmd env err m , IsDBCmd env err m
, MonadLogger m , MonadLogger m
, CES.MonadCatch m , CES.MonadCatch m
...@@ -553,7 +551,7 @@ commitNgramsForDocuments ng nodes = ...@@ -553,7 +551,7 @@ commitNgramsForDocuments ng nodes =
insertMasterDocs :: ( HasNodeError err insertMasterDocs :: ( HasNodeError err
, UniqParameters doc , UniqParameters doc
, FlowCorpus doc , FlowCorpus doc
, MkCorpus c, Show doc , MkCorpus c
) )
=> GargConfig => GargConfig
-> UncommittedNgrams doc -> UncommittedNgrams doc
......
...@@ -13,6 +13,7 @@ Portability : POSIX ...@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module Gargantext.Database.Action.Flow.Extract module Gargantext.Database.Action.Flow.Extract
...@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight) ...@@ -30,7 +31,6 @@ import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source )
import Gargantext.Database.Admin.Types.Node ( Node ) import Gargantext.Database.Admin.Types.Node ( Node )
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag) import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag)
import Gargantext.Database.Schema.Ngrams ( text2ngrams ) import Gargantext.Database.Schema.Ngrams ( text2ngrams )
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
...@@ -39,7 +39,7 @@ import Gargantext.Prelude ...@@ -39,7 +39,7 @@ import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance ExtractNgrams HyperdataContact where instance Monad m => ExtractNgrams m HyperdataContact where
extractNgrams _ncs _l = pure . HashMap.mapKeys (cleanExtractedNgrams 255) . extract extractNgrams _ncs _l = pure . HashMap.mapKeys (cleanExtractedNgrams 255) . extract
where where
extract :: HyperdataContact extract :: HyperdataContact
...@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where ...@@ -55,15 +55,15 @@ instance ExtractNgrams HyperdataContact where
-- | Main ngrams extraction functionality. -- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood. -- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ". -- For Sources, Institutes, Authors, this uses simple split on " ".
instance ExtractNgrams HyperdataDocument where instance (Monad m, MonadBase IO m) => ExtractNgrams m HyperdataDocument where
extractNgrams :: NLPServerConfig extractNgrams :: NLPServerConfig
-> TermType Lang -> TermType Lang
-> HyperdataDocument -> HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) -> m (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extractNgrams ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd extractNgrams ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
where where
extractNgramsT' :: HyperdataDocument extractNgramsT' :: HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) -> m (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
extractNgramsT' doc = do extractNgramsT' doc = do
let source = text2ngrams let source = text2ngrams
$ maybe "Nothing" identity $ maybe "Nothing" identity
...@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where ...@@ -87,7 +87,7 @@ instance ExtractNgrams HyperdataDocument where
<> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] <> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
<> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ] <> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgrams a, HasText a) => ExtractNgrams (Node a) instance (ExtractNgrams m a, HasText a) => ExtractNgrams m (Node a)
where where
extractNgrams ncs l (Node { _node_hyperdata = h }) = extractNgrams ncs l h extractNgrams ncs l (Node { _node_hyperdata = h }) = extractNgrams ncs l h
......
...@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory ) ...@@ -25,7 +25,6 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text ( HasText ) import Gargantext.Core.Text ( HasText )
import Gargantext.API.Admin.Orchestrator.Types qualified as API import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms ( ExtractNgrams )
import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight) import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
...@@ -52,7 +51,6 @@ type FlowCmdM env err m = ...@@ -52,7 +51,6 @@ type FlowCmdM env err m =
type FlowCorpus a = ( UniqParameters a type FlowCorpus a = ( UniqParameters a
, InsertDb a , InsertDb a
, ExtractNgrams a
, HasText a , HasText a
, ToNode a , ToNode a
, ToJSON a , ToJSON a
......
...@@ -151,9 +151,9 @@ data ContextForNgramsTerms = ...@@ -151,9 +151,9 @@ data ContextForNgramsTerms =
getContextsForNgramsTerms :: HasNodeError err getContextsForNgramsTerms :: HasNodeError err
=> NodeId => NodeId
-> [Text] -> [Text]
-> Maybe Bool -> Bool
-> DBQuery err x [ContextForNgramsTerms] -> DBQuery err x [ContextForNgramsTerms]
getContextsForNgramsTerms cId ngramsTerms (Just True) = do getContextsForNgramsTerms cId ngramsTerms True = do
let terms_length = length ngramsTerms let terms_length = length ngramsTerms
res <- mkPGQuery query (cId, PGS.In ngramsTerms, terms_length) res <- mkPGQuery query (cId, PGS.In ngramsTerms, terms_length)
pure $ (\( _cfnt_nodeId pure $ (\( _cfnt_nodeId
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment