Verified Commit 337d2af5 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 471-dev-node-multiterms

parents b0913118 d19839d8
Pipeline #7651 failed with stages
in 62 minutes and 31 seconds
...@@ -19,6 +19,7 @@ module CLI.Import where ...@@ -19,6 +19,7 @@ module CLI.Import where
import CLI.Parsers import CLI.Parsers
import CLI.Types import CLI.Types
import Control.Monad.Catch (MonadCatch)
import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..)) import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
import Gargantext.API.Dev (withDevEnv, runCmdGargDev) import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types ( BackendInternalError )
...@@ -42,14 +43,14 @@ importCLI (ImportArgs fun user name settingsPath corpusPath) = do ...@@ -42,14 +43,14 @@ importCLI (ImportArgs fun user name settingsPath corpusPath) = do
let let
tt = Multi EN tt = Multi EN
format = TsvGargV3 format = TsvGargV3
corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, MonadCatch m, JobHandle m ~ DevJobHandle) => m CorpusId
mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text) mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text)
corpus = flowCorpusFile mkCorpusUser tt format Plain corpusPath Nothing DevJobHandle corpus = flowCorpusFile mkCorpusUser tt format Plain corpusPath Nothing DevJobHandle
corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, MonadCatch m, JobHandle m ~ DevJobHandle) => m CorpusId
corpusTsvHal = flowCorpusFile mkCorpusUser tt TsvHal Plain corpusPath Nothing DevJobHandle corpusTsvHal = flowCorpusFile mkCorpusUser tt TsvHal Plain corpusPath Nothing DevJobHandle
annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, MonadCatch m, JobHandle m ~ DevJobHandle) => m CorpusId
annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle
withDevEnv settingsPath $ \env -> do withDevEnv settingsPath $ \env -> do
......
...@@ -702,6 +702,7 @@ executable gargantext ...@@ -702,6 +702,7 @@ executable gargantext
, containers ^>= 0.6.7 , containers ^>= 0.6.7
, cryptohash ^>= 0.11.9 , cryptohash ^>= 0.11.9
, directory ^>= 1.3.7.1 , directory ^>= 1.3.7.1
, exceptions >= 0.9.0 && < 0.11
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
......
...@@ -18,6 +18,7 @@ module Gargantext.API.Node.Contact ...@@ -18,6 +18,7 @@ module Gargantext.API.Node.Contact
where where
import Conduit ( yield ) import Conduit ( yield )
import Control.Monad.Catch (MonadCatch)
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser(AuthenticatedUser) ) import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser(AuthenticatedUser) )
import Gargantext.API.Admin.EnvTypes (Env) import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types ( BackendInternalError )
...@@ -57,7 +58,7 @@ apiAsync u nId = Named.ContactAsyncAPI { ...@@ -57,7 +58,7 @@ apiAsync u nId = Named.ContactAsyncAPI {
, _ac_user = u } , _ac_user = u }
} }
addContact :: (FlowCmdM env err m, MonadJobStatus m) addContact :: (FlowCmdM env err m, MonadJobStatus m, MonadCatch m)
=> User => User
-> NodeId -> NodeId
-> AddContactParams -> AddContactParams
......
...@@ -24,6 +24,7 @@ module Gargantext.API.Node.Corpus.New ...@@ -24,6 +24,7 @@ module Gargantext.API.Node.Corpus.New
import Conduit ((.|), yieldMany, mapMC, transPipe) import Conduit ((.|), yieldMany, mapMC, transPipe)
import Control.Exception.Safe (MonadMask) import Control.Exception.Safe (MonadMask)
import Control.Lens ( view, non ) import Control.Lens ( view, non )
import Control.Monad.Catch (MonadCatch)
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
import Data.Conduit.List (mapMaybeM) import Data.Conduit.List (mapMaybeM)
import Data.Swagger ( ToSchema(..) ) import Data.Swagger ( ToSchema(..) )
...@@ -56,6 +57,7 @@ import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId) ...@@ -56,6 +57,7 @@ import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import Gargantext.Database.GargDB qualified as GargDB import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList) import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusIds)) import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusIds))
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
...@@ -63,7 +65,6 @@ import Gargantext.Prelude ...@@ -63,7 +65,6 @@ import Gargantext.Prelude
import Gargantext.System.Logging ( logLocM, LogLevel(..) ) import Gargantext.System.Logging ( logLocM, LogLevel(..) )
import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..)) import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..))
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..)) import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
------------------------------------------------------------------------ ------------------------------------------------------------------------
{- {-
...@@ -150,6 +151,7 @@ type AddWithFile = Summary "Add with MultipartData to corpus endpoint" ...@@ -150,6 +151,7 @@ type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
addToCorpusWithQuery :: ( FlowCmdM env err m addToCorpusWithQuery :: ( FlowCmdM env err m
, MonadJobStatus m , MonadJobStatus m
, MonadCatch m
) )
=> User => User
-> CorpusId -> CorpusId
......
...@@ -14,6 +14,7 @@ Portability : POSIX ...@@ -14,6 +14,7 @@ Portability : POSIX
module Gargantext.API.Node.Corpus.Searx where module Gargantext.API.Node.Corpus.Searx where
import Control.Lens (view) import Control.Lens (view)
import Control.Monad.Catch (MonadCatch)
import Data.Aeson qualified as Aeson import Data.Aeson qualified as Aeson
import Data.Text qualified as T import Data.Text qualified as T
import Data.Text qualified as Text import Data.Text qualified as Text
...@@ -40,6 +41,7 @@ import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNot ...@@ -40,6 +41,7 @@ import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNot
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError) import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.System.Logging (MonadLogger)
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..)) import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..))
import Network.HTTP.Client import Network.HTTP.Client
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
...@@ -118,6 +120,8 @@ insertSearxResponse :: ( MonadBase IO m ...@@ -118,6 +120,8 @@ insertSearxResponse :: ( MonadBase IO m
, HasNodeError err , HasNodeError err
, HasTreeError err , HasTreeError err
, HasValidationError err , HasValidationError err
, MonadCatch m
, MonadLogger m
) )
=> User => User
-> CorpusId -> CorpusId
...@@ -155,6 +159,8 @@ triggerSearxSearch :: ( MonadBase IO m ...@@ -155,6 +159,8 @@ triggerSearxSearch :: ( MonadBase IO m
, HasTreeError err , HasTreeError err
, HasValidationError err , HasValidationError err
, MonadJobStatus m , MonadJobStatus m
, MonadCatch m
, MonadLogger m
) )
=> User => User
-> CorpusId -> CorpusId
......
...@@ -16,6 +16,7 @@ Portability : POSIX ...@@ -16,6 +16,7 @@ Portability : POSIX
module Gargantext.API.Node.DocumentUpload where module Gargantext.API.Node.DocumentUpload where
import Control.Lens (view) import Control.Lens (view)
import Control.Monad.Catch (MonadCatch)
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..)) import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..))
import Gargantext.API.Admin.EnvTypes (Env) import Gargantext.API.Admin.EnvTypes (Env)
...@@ -32,16 +33,16 @@ import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv) ...@@ -32,16 +33,16 @@ import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Core.Worker.Jobs.Types (WorkSplit(..)) import Gargantext.Core.Worker.Jobs.Types (WorkSplit(..))
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types ( FlowCmdM ) import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus ) import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus), ParentId ) import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus), ParentId )
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType') import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Schema.Node (_node_hyperdata) import Gargantext.Database.Schema.Node (_node_hyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger) import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
...@@ -55,7 +56,7 @@ api nId = Named.DocumentUploadAPI { ...@@ -55,7 +56,7 @@ api nId = Named.DocumentUploadAPI {
, _ud_node_id = nId } , _ud_node_id = nId }
} }
documentUploadAsync :: (FlowCmdM env err m, MonadJobStatus m) documentUploadAsync :: (FlowCmdM env err m, MonadJobStatus m, MonadCatch m)
=> NodeId => NodeId
-> DocumentUpload -> DocumentUpload
-> JobHandle m -> JobHandle m
...@@ -66,7 +67,7 @@ documentUploadAsync nId doc jobHandle = do ...@@ -66,7 +67,7 @@ documentUploadAsync nId doc jobHandle = do
-- printDebug "documentUploadAsync" docIds -- printDebug "documentUploadAsync" docIds
markComplete jobHandle markComplete jobHandle
documentUpload :: (FlowCmdM env err m) documentUpload :: (FlowCmdM env err m, MonadCatch m)
=> NodeId => NodeId
-> DocumentUpload -> DocumentUpload
-> m [DocId] -> m [DocId]
...@@ -110,6 +111,7 @@ remoteImportDocuments :: ( HasNodeError err ...@@ -110,6 +111,7 @@ remoteImportDocuments :: ( HasNodeError err
, HasNodeStoryEnv env err , HasNodeStoryEnv env err
, IsDBCmd env err m , IsDBCmd env err m
, MonadLogger m , MonadLogger m
, MonadCatch m
, MonadIO m) , MonadIO m)
=> AuthenticatedUser => AuthenticatedUser
-> ParentId -> ParentId
......
...@@ -17,6 +17,8 @@ module Gargantext.API.Node.DocumentsFromWriteNodes ...@@ -17,6 +17,8 @@ module Gargantext.API.Node.DocumentsFromWriteNodes
where where
import Conduit ( yieldMany ) import Conduit ( yieldMany )
import Control.Lens (view)
import Control.Monad.Catch (MonadCatch)
import Data.List qualified as List import Data.List qualified as List
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id ) import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id )
...@@ -39,15 +41,14 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM) ...@@ -39,15 +41,14 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents ) import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents )
import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) ) import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) )
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList) import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date) import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Database.Prelude
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.System.Logging (logLocM, LogLevel(..)) import Gargantext.System.Logging (logLocM, LogLevel(..))
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..))
import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..)) import Gargantext.Utils.Jobs.Error (HumanFriendlyErrorText(..))
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..))
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
import Control.Lens (view)
api :: AuthenticatedUser api :: AuthenticatedUser
-- ^ The logged-in user -- ^ The logged-in user
...@@ -63,6 +64,7 @@ api authenticatedUser nId = ...@@ -63,6 +64,7 @@ api authenticatedUser nId =
documentsFromWriteNodes :: ( FlowCmdM env err m documentsFromWriteNodes :: ( FlowCmdM env err m
, MonadJobStatus m , MonadJobStatus m
, MonadCatch m
) )
=> AuthenticatedUser => AuthenticatedUser
-- ^ The logged-in user -- ^ The logged-in user
......
...@@ -116,13 +116,12 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams } ...@@ -116,13 +116,12 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance Hashable ExtractedNgrams instance Hashable ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity. -- | A typeclass that represents extracting ngrams from an entity.
class ExtractNgramsT h class ExtractNgrams h where
where extractNgrams :: NLPServerConfig
extractNgramsT :: HasText h -> TermType Lang
=> NLPServerConfig -> h
-> TermType Lang -> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
-> h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
------------------------------------------------------------------------ ------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms { .. }) = enrichedTerms l pa po (Terms { .. }) =
......
This diff is collapsed.
...@@ -25,7 +25,7 @@ import Gargantext.Core (Lang, NLPServerConfig(server)) ...@@ -25,7 +25,7 @@ import Gargantext.Core (Lang, NLPServerConfig(server))
import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Text.Corpus.Parsers (splitOn) import Gargantext.Core.Text.Corpus.Parsers (splitOn)
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang) import Gargantext.Core.Text.Terms (ExtractNgrams(..), ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractTerms, tt_lang)
import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight) import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source )
...@@ -39,59 +39,57 @@ import Gargantext.Prelude ...@@ -39,59 +39,57 @@ import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact instance ExtractNgrams HyperdataContact where
where extractNgrams _ncs _l = pure . HashMap.mapKeys (cleanExtractedNgrams 255) . extract
extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc where
where extract :: HyperdataContact
extract :: TermType Lang -> HyperdataContact -> HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) extract hc' =
extract _l hc' = do let authors = map text2ngrams
let authors = map text2ngrams $ maybe ["Nothing"] (\a -> [a])
$ maybe ["Nothing"] (\a -> [a]) $ view (hc_who . _Just . cw_lastName) hc'
$ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] in HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
-- | Main ngrams extraction functionality. -- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood. -- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ". -- For Sources, Institutes, Authors, this uses simple split on " ".
instance ExtractNgramsT HyperdataDocument instance ExtractNgrams HyperdataDocument where
where extractNgrams :: NLPServerConfig
extractNgramsT :: NLPServerConfig -> TermType Lang
-> TermType Lang -> HyperdataDocument
-> HyperdataDocument -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) extractNgrams ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd where
where extractNgramsT' :: HyperdataDocument
extractNgramsT' :: HyperdataDocument -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount)) extractNgramsT' doc = do
extractNgramsT' doc = do let source = text2ngrams
let source = text2ngrams $ maybe "Nothing" identity
$ maybe "Nothing" identity $ doc ^. hd_source
$ doc ^. hd_source
institutes = map text2ngrams
institutes = map text2ngrams $ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd)) $ doc ^. hd_institutes
$ doc ^. hd_institutes
authors = map text2ngrams
authors = map text2ngrams $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd)) $ doc ^. hd_authors
$ doc ^. hd_authors
termsWithCounts' :: [(NgramsPostag, TermsCount)] <-
termsWithCounts' :: [(NgramsPostag, TermsCount)] <- map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$>
map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$> liftBase (extractTerms ncs lang $ hasText doc)
liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
pure $ HashMap.fromList $ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ]
$ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ] <> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ] <> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
<> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] <> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
<> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgrams a, HasText a) => ExtractNgrams (Node a)
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where where
extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h extractNgrams ncs l (Node { _node_hyperdata = h }) = extractNgrams ncs l h
instance HasText a => HasText (Node a) instance HasText a => HasText (Node a)
......
...@@ -25,7 +25,7 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory ) ...@@ -25,7 +25,7 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text ( HasText ) import Gargantext.Core.Text ( HasText )
import Gargantext.API.Admin.Orchestrator.Types qualified as API import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms ( ExtractNgramsT ) import Gargantext.Core.Text.Terms ( ExtractNgrams )
import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight) import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
...@@ -52,7 +52,7 @@ type FlowCmdM env err m = ...@@ -52,7 +52,7 @@ type FlowCmdM env err m =
type FlowCorpus a = ( UniqParameters a type FlowCorpus a = ( UniqParameters a
, InsertDb a , InsertDb a
, ExtractNgramsT a , ExtractNgrams a
, HasText a , HasText a
, ToNode a , ToNode a
, ToJSON a , ToJSON a
...@@ -66,9 +66,9 @@ type FlowInsertDB a = ( AddUniqId a ...@@ -66,9 +66,9 @@ type FlowInsertDB a = ( AddUniqId a
data DocumentIdWithNgrams a b = data DocumentIdWithNgrams ix a b =
DocumentIdWithNgrams DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a { documentWithId :: Indexed ix a
, documentNgrams :: HashMap b (Map NgramsType TermsWeight, TermsCount) , documentNgrams :: HashMap b (Map NgramsType TermsWeight, TermsCount)
} deriving (Show) } deriving (Show)
......
...@@ -13,9 +13,10 @@ module Gargantext.Database.Action.Flow.Utils ...@@ -13,9 +13,10 @@ module Gargantext.Database.Action.Flow.Utils
( docNgrams ( docNgrams
, docNgrams' , docNgrams'
, documentIdWithNgrams , documentIdWithNgrams
, mapDocumentIdWithNgrams
, insertDocNgrams , insertDocNgrams
, insertDocs , insertDocs
, mapNodeIdNgrams , mkNodeIdNgramsMap
, ngramsByDoc ) , ngramsByDoc )
where where
...@@ -39,7 +40,6 @@ import Gargantext.Database.Prelude ...@@ -39,7 +40,6 @@ import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly(..), insertContextNodeNgrams ) import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly(..), insertContextNodeNgrams )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId) import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id) import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..), text2ngrams) import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..), text2ngrams)
import Gargantext.Database.Types ( Indexed(..), index ) import Gargantext.Database.Types ( Indexed(..), index )
...@@ -94,34 +94,40 @@ docNgrams' lang ts txt = ...@@ -94,34 +94,40 @@ docNgrams' lang ts txt =
termsInText lang (buildPatternsWith lang ts) txt termsInText lang (buildPatternsWith lang ts) txt
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: Monad m
=> ( a => ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> DBCmd err (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) ) -> Indexed ix a
-> [Indexed NodeId a] -> m (DocumentIdWithNgrams ix a b)
-> DBCmd err [DocumentIdWithNgrams a b] documentIdWithNgrams f = toDocumentIdWithNgrams
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where where
toDocumentIdWithNgrams d = do toDocumentIdWithNgrams d = do
e <- f $ _unIndex d e <- f $ _unIndex d
pure $ DocumentIdWithNgrams { documentWithId = d pure $ DocumentIdWithNgrams { documentWithId = d
, documentNgrams = e } , documentNgrams = e }
mapDocumentIdWithNgrams :: Monad m
=> ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> [Indexed ix a]
-> m [DocumentIdWithNgrams ix a b]
mapDocumentIdWithNgrams f = mapM (documentIdWithNgrams f)
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b) -- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list.
=> [DocumentIdWithNgrams a b] -- TODO check optimization
mkNodeIdNgramsMap :: forall ix a b. (Ord b, Hashable b, Ord ix)
=> [DocumentIdWithNgrams ix a b]
-> HashMap.HashMap b -> HashMap.HashMap b
(Map NgramsType (Map NgramsType
(Map NodeId (TermsWeight, TermsCount)) (Map ix (TermsWeight, TermsCount))
) )
mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the -- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount' -- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a -- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types. -- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b f :: DocumentIdWithNgrams ix a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) -> HashMap.HashMap b (Map NgramsType (Map ix (TermsWeight, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d
where where
nId = _index $ documentWithId d nId = _index $ documentWithId d
......
...@@ -43,7 +43,7 @@ instance NFData HyperdataContact where ...@@ -43,7 +43,7 @@ instance NFData HyperdataContact where
instance HasText HyperdataContact instance HasText HyperdataContact
where where
hasText = undefined hasText = mempty
defaultHyperdataContact :: HyperdataContact defaultHyperdataContact :: HyperdataContact
defaultHyperdataContact = defaultHyperdataContact =
......
...@@ -292,7 +292,7 @@ instance ToSchema NodeId ...@@ -292,7 +292,7 @@ instance ToSchema NodeId
-- | An identifier for a 'Context' in gargantext. -- | An identifier for a 'Context' in gargantext.
newtype ContextId = UnsafeMkContextId { _ContextId :: Int } newtype ContextId = UnsafeMkContextId { _ContextId :: Int }
deriving stock (Show, Eq, Ord, Generic) deriving stock (Show, Eq, Ord, Generic)
deriving newtype (Csv.ToField, ToJSONKey, FromJSONKey, ToJSON, FromJSON, ToField, ToSchema) deriving newtype (Csv.ToField, ToJSONKey, FromJSONKey, ToJSON, FromJSON, ToField, ToSchema, Hashable)
deriving anyclass ToExpr deriving anyclass ToExpr
deriving FromField via NodeId deriving FromField via NodeId
......
...@@ -72,7 +72,7 @@ import Gargantext.Database.Prelude ...@@ -72,7 +72,7 @@ import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (hash, toLower) import Gargantext.Prelude hiding (hash, toLower)
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash, Hash)
{-| To Print result query {-| To Print result query
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
...@@ -221,9 +221,9 @@ instance UniqParameters HyperdataContact ...@@ -221,9 +221,9 @@ instance UniqParameters HyperdataContact
where where
uniqParameters _ = "" uniqParameters _ = ""
instance UniqParameters (Node a) instance UniqParameters a => UniqParameters (Node a)
where where
uniqParameters _ = undefined uniqParameters = uniqParameters . _node_hyperdata
filterText :: Text -> Text filterText :: Text -> Text
...@@ -232,9 +232,13 @@ filterText = DT.toLower . DT.filter isAlphaNum ...@@ -232,9 +232,13 @@ filterText = DT.toLower . DT.filter isAlphaNum
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a) instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h addUniqId node = node { _node_hash_id = Just $ newUniqIdHash node }
where where
newHash = "\\x" <> hash (uniqParameters h)
-- | Returns a new unique ID computed by hashing the uniq parameters of the input
-- and prefixing everything with '\\x'.
newUniqIdHash :: UniqParameters a => a -> Hash
newUniqIdHash a = "\\x" <> hash (uniqParameters a)
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
......
...@@ -11,6 +11,7 @@ Portability : POSIX ...@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-orphans #-} {-# OPTIONS_GHC -Wno-orphans #-}
{-# LANGUAGE TypeApplications #-}
module Test.Database.Types where module Test.Database.Types where
...@@ -24,6 +25,7 @@ import Data.Map qualified as Map ...@@ -24,6 +25,7 @@ import Data.Map qualified as Map
import Data.Pool import Data.Pool
import Database.PostgreSQL.Simple qualified as PG import Database.PostgreSQL.Simple qualified as PG
import Database.Postgres.Temp qualified as Tmp import Database.Postgres.Temp qualified as Tmp
import GHC.IO.Exception (userError)
import Gargantext hiding (to) import Gargantext hiding (to)
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types
...@@ -33,13 +35,12 @@ import Gargantext.Core.Config.Mail (MailConfig(..), LoginType(NoAuth), SendEmail ...@@ -33,13 +35,12 @@ import Gargantext.Core.Config.Mail (MailConfig(..), LoginType(NoAuth), SendEmail
import Gargantext.Core.Mail.Types (HasMail(..)) import Gargantext.Core.Mail.Types (HasMail(..))
import Gargantext.Core.NLP (HasNLPServer(..)) import Gargantext.Core.NLP (HasNLPServer(..))
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.System.Logging (HasLogger(..), Logger, MonadLogger(..)) import Gargantext.System.Logging (HasLogger(..), Logger, MonadLogger(..), LogLevel(..))
import Gargantext.System.Logging.Loggers import Gargantext.System.Logging.Loggers
import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..)) import Gargantext.Utils.Jobs.Monad (MonadJobStatus(..))
import Network.URI (parseURI) import Network.URI (parseURI)
import Prelude qualified import Prelude qualified
import System.Log.FastLogger qualified as FL import System.Log.FastLogger qualified as FL
import GHC.IO.Exception (userError)
newtype Counter = Counter { _Counter :: IORef Int } newtype Counter = Counter { _Counter :: IORef Int }
...@@ -75,6 +76,19 @@ newtype TestMonadM env err a = TestMonad { _TestMonad :: ReaderT env IO a } ...@@ -75,6 +76,19 @@ newtype TestMonadM env err a = TestMonad { _TestMonad :: ReaderT env IO a }
, MonadThrow , MonadThrow
) )
instance HasLogger (TestMonadM TestEnv BackendInternalError) where
data instance Logger (TestMonadM TestEnv BackendInternalError) = TestLogger { _IOLogger :: IOStdLogger }
type instance LogInitParams (TestMonadM TestEnv BackendInternalError) = LogConfig
type instance LogPayload (TestMonadM TestEnv BackendInternalError) = Prelude.String
initLogger cfg = fmap TestLogger $ (liftIO $ ioStdLogger cfg)
destroyLogger = liftIO . _iosl_destroy . _IOLogger
logMsg (TestLogger ioLogger) lvl msg = liftIO $ _iosl_log_msg ioLogger lvl msg
logTxt (TestLogger ioLogger) lvl msg = liftIO $ _iosl_log_txt ioLogger lvl msg
instance MonadLogger (TestMonadM TestEnv BackendInternalError) where
getLogger = TestMonad $ do
initLogger @(TestMonadM TestEnv BackendInternalError) (LogConfig Nothing ERROR)
runTestMonadM :: env -> TestMonadM env err a -> IO a runTestMonadM :: env -> TestMonadM env err a -> IO a
runTestMonadM env = flip runReaderT env . _TestMonad runTestMonadM env = flip runReaderT env . _TestMonad
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment