Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
163
Issues
163
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
337d2af5
Verified
Commit
337d2af5
authored
Jun 09, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into 471-dev-node-multiterms
parents
b0913118
d19839d8
Pipeline
#7651
failed with stages
in 62 minutes and 31 seconds
Changes
16
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
289 additions
and
133 deletions
+289
-133
Import.hs
bin/gargantext-cli/CLI/Import.hs
+4
-3
gargantext.cabal
gargantext.cabal
+1
-0
Contact.hs
src/Gargantext/API/Node/Contact.hs
+2
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+3
-1
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+6
-0
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+6
-4
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+5
-3
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+6
-7
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+159
-39
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+45
-47
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+4
-4
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+21
-15
Contact.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Contact.hs
+1
-1
Node.hs
src/Gargantext/Database/Admin/Types/Node.hs
+1
-1
Insert.hs
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
+9
-5
Types.hs
test/Test/Database/Types.hs
+16
-2
No files found.
bin/gargantext-cli/CLI/Import.hs
View file @
337d2af5
...
...
@@ -19,6 +19,7 @@ module CLI.Import where
import
CLI.Parsers
import
CLI.Types
import
Control.Monad.Catch
(
MonadCatch
)
import
Gargantext.API.Admin.EnvTypes
(
DevEnv
(
..
),
DevJobHandle
(
..
))
import
Gargantext.API.Dev
(
withDevEnv
,
runCmdGargDev
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
...
...
@@ -42,14 +43,14 @@ importCLI (ImportArgs fun user name settingsPath corpusPath) = do
let
tt
=
Multi
EN
format
=
TsvGargV3
corpus
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpus
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
MonadCatch
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
mkCorpusUser
=
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
(
cs
name
::
Text
)
corpus
=
flowCorpusFile
mkCorpusUser
tt
format
Plain
corpusPath
Nothing
DevJobHandle
corpusTsvHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpusTsvHal
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
MonadCatch
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
corpusTsvHal
=
flowCorpusFile
mkCorpusUser
tt
TsvHal
Plain
corpusPath
Nothing
DevJobHandle
annuaire
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
annuaire
::
forall
m
.
(
FlowCmdM
DevEnv
BackendInternalError
m
,
MonadJobStatus
m
,
MonadCatch
m
,
JobHandle
m
~
DevJobHandle
)
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
MkCorpusUserNormalCorpusName
(
UserName
$
cs
user
)
"Annuaire"
)
(
Multi
EN
)
corpusPath
DevJobHandle
withDevEnv
settingsPath
$
\
env
->
do
...
...
gargantext.cabal
View file @
337d2af5
...
...
@@ -702,6 +702,7 @@ executable gargantext
, containers ^>= 0.6.7
, cryptohash ^>= 0.11.9
, directory ^>= 1.3.7.1
, exceptions >= 0.9.0 && < 0.11
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
...
...
src/Gargantext/API/Node/Contact.hs
View file @
337d2af5
...
...
@@ -18,6 +18,7 @@ module Gargantext.API.Node.Contact
where
import
Conduit
(
yield
)
import
Control.Monad.Catch
(
MonadCatch
)
import
Gargantext.API.Admin.Auth.Types
(
AuthenticatedUser
(
AuthenticatedUser
)
)
import
Gargantext.API.Admin.EnvTypes
(
Env
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
...
...
@@ -57,7 +58,7 @@ apiAsync u nId = Named.ContactAsyncAPI {
,
_ac_user
=
u
}
}
addContact
::
(
FlowCmdM
env
err
m
,
MonadJobStatus
m
)
addContact
::
(
FlowCmdM
env
err
m
,
MonadJobStatus
m
,
MonadCatch
m
)
=>
User
->
NodeId
->
AddContactParams
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
337d2af5
...
...
@@ -24,6 +24,7 @@ module Gargantext.API.Node.Corpus.New
import
Conduit
((
.|
),
yieldMany
,
mapMC
,
transPipe
)
import
Control.Exception.Safe
(
MonadMask
)
import
Control.Lens
(
view
,
non
)
import
Control.Monad.Catch
(
MonadCatch
)
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Conduit.List
(
mapMaybeM
)
import
Data.Swagger
(
ToSchema
(
..
)
)
...
...
@@ -56,6 +57,7 @@ import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import
Gargantext.Database.GargDB
qualified
as
GargDB
import
Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
,
getOrMkList
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
MkCorpusUserNormalCorpusIds
))
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
...
...
@@ -63,7 +65,6 @@ import Gargantext.Prelude
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
..
)
)
import
Gargantext.Utils.Jobs.Error
(
HumanFriendlyErrorText
(
..
))
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
..
))
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
------------------------------------------------------------------------
{-
...
...
@@ -150,6 +151,7 @@ type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
addToCorpusWithQuery
::
(
FlowCmdM
env
err
m
,
MonadJobStatus
m
,
MonadCatch
m
)
=>
User
->
CorpusId
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
View file @
337d2af5
...
...
@@ -14,6 +14,7 @@ Portability : POSIX
module
Gargantext.API.Node.Corpus.Searx
where
import
Control.Lens
(
view
)
import
Control.Monad.Catch
(
MonadCatch
)
import
Data.Aeson
qualified
as
Aeson
import
Data.Text
qualified
as
T
import
Data.Text
qualified
as
Text
...
...
@@ -40,6 +41,7 @@ import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNot
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Prelude
import
Gargantext.System.Logging
(
MonadLogger
)
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
..
))
import
Network.HTTP.Client
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
...
...
@@ -118,6 +120,8 @@ insertSearxResponse :: ( MonadBase IO m
,
HasNodeError
err
,
HasTreeError
err
,
HasValidationError
err
,
MonadCatch
m
,
MonadLogger
m
)
=>
User
->
CorpusId
...
...
@@ -155,6 +159,8 @@ triggerSearxSearch :: ( MonadBase IO m
,
HasTreeError
err
,
HasValidationError
err
,
MonadJobStatus
m
,
MonadCatch
m
,
MonadLogger
m
)
=>
User
->
CorpusId
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
337d2af5
...
...
@@ -16,6 +16,7 @@ Portability : POSIX
module
Gargantext.API.Node.DocumentUpload
where
import
Control.Lens
(
view
)
import
Control.Monad.Catch
(
MonadCatch
)
import
Data.Text
qualified
as
T
import
Gargantext.API.Admin.Auth.Types
(
AuthenticatedUser
(
..
))
import
Gargantext.API.Admin.EnvTypes
(
Env
)
...
...
@@ -32,16 +33,16 @@ import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv)
import
Gargantext.Core.Text.Corpus.Parsers.Date
(
mDateSplit
)
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Worker.Jobs.Types
qualified
as
Jobs
import
Gargantext.Core.Worker.Jobs.Types
(
WorkSplit
(
..
))
import
Gargantext.Core.Worker.Jobs.Types
qualified
as
Jobs
import
Gargantext.Database.Action.Flow
(
addDocumentsToHyperCorpus
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
DocId
,
NodeId
,
NodeType
(
NodeCorpus
),
ParentId
)
import
Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType'
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Schema.Node
(
_node_hyperdata
)
import
Gargantext.Prelude
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
..
),
MonadLogger
)
...
...
@@ -55,7 +56,7 @@ api nId = Named.DocumentUploadAPI {
,
_ud_node_id
=
nId
}
}
documentUploadAsync
::
(
FlowCmdM
env
err
m
,
MonadJobStatus
m
)
documentUploadAsync
::
(
FlowCmdM
env
err
m
,
MonadJobStatus
m
,
MonadCatch
m
)
=>
NodeId
->
DocumentUpload
->
JobHandle
m
...
...
@@ -66,7 +67,7 @@ documentUploadAsync nId doc jobHandle = do
-- printDebug "documentUploadAsync" docIds
markComplete
jobHandle
documentUpload
::
(
FlowCmdM
env
err
m
)
documentUpload
::
(
FlowCmdM
env
err
m
,
MonadCatch
m
)
=>
NodeId
->
DocumentUpload
->
m
[
DocId
]
...
...
@@ -110,6 +111,7 @@ remoteImportDocuments :: ( HasNodeError err
,
HasNodeStoryEnv
env
err
,
IsDBCmd
env
err
m
,
MonadLogger
m
,
MonadCatch
m
,
MonadIO
m
)
=>
AuthenticatedUser
->
ParentId
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
337d2af5
...
...
@@ -17,6 +17,8 @@ module Gargantext.API.Node.DocumentsFromWriteNodes
where
import
Conduit
(
yieldMany
)
import
Control.Lens
(
view
)
import
Control.Monad.Catch
(
MonadCatch
)
import
Data.List
qualified
as
List
import
Data.Text
qualified
as
T
import
Gargantext.API.Admin.Auth.Types
(
AuthenticatedUser
,
auth_node_id
,
auth_user_id
)
...
...
@@ -39,15 +41,14 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
(
HyperdataFrame
(
..
),
getHyperdataFrameContents
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
,
Node
,
NodeType
(
..
)
)
import
Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.Node
(
getChildrenByType
,
getClosestParentIdByType'
,
getNodeWith
,
getOrMkList
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
,
node_name
,
node_date
)
import
Gargantext.Database.Prelude
import
Gargantext.Prelude
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
..
))
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
(
..
))
import
Gargantext.Utils.Jobs.Error
(
HumanFriendlyErrorText
(
..
))
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
(
..
))
import
Servant.Server.Generic
(
AsServerT
)
import
Control.Lens
(
view
)
api
::
AuthenticatedUser
-- ^ The logged-in user
...
...
@@ -63,6 +64,7 @@ api authenticatedUser nId =
documentsFromWriteNodes
::
(
FlowCmdM
env
err
m
,
MonadJobStatus
m
,
MonadCatch
m
)
=>
AuthenticatedUser
-- ^ The logged-in user
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
337d2af5
...
...
@@ -116,13 +116,12 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgramsT
h
where
extractNgramsT
::
HasText
h
=>
NLPServerConfig
->
TermType
Lang
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
class
ExtractNgrams
h
where
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
l
pa
po
(
Terms
{
..
})
=
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
337d2af5
...
...
@@ -15,15 +15,17 @@ Portability : POSIX
-- TODO-EVENTS: InsertedNodes
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TupleSections #-}
module
Gargantext.Database.Action.Flow
-- (flowDatabase, ngrams2list)
(
DataText
(
..
)
...
...
@@ -54,7 +56,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import
Conduit
import
Control.Lens
(
to
,
view
)
import
Data.Bifunctor
qualified
as
B
import
Control.Monad.Catch
import
Data.Conduit
qualified
as
C
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Conduit.List
qualified
as
CL
...
...
@@ -67,12 +69,13 @@ import Data.Text qualified as T
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
)
import
Gargantext.Core
(
Lang
(
..
),
withDefaultLanguage
,
NLPServerConfig
)
import
Gargantext.Core.Notifications.CentralExchange.Types
(
HasCentralExchangeNotification
(
ce_notify
),
CEMessage
(
..
))
import
Gargantext.Core.Config
(
GargConfig
(
..
),
hasConfig
)
import
Gargantext.Core.Config.Types
(
APIsConfig
(
..
))
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.NLP
(
HasNLPServer
,
nlpServerGet
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
,
NodeStoryEnv
,
HasNodeStoryEnv
(
..
))
import
Gargantext.Core.Notifications.CentralExchange.Types
(
HasCentralExchangeNotification
(
ce_notify
),
CEMessage
(
..
))
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.Core.Text.Corpus.API
qualified
as
API
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
,
FileType
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
...
...
@@ -86,27 +89,28 @@ import Gargantext.Core.Types.Individu (User(..))
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
)
)
import
Gargantext.Database.Action.Flow.Extract
()
-- ExtractNgramsT instances
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
,
toNodeNgramsW'
)
import
Gargantext.Database.Action.Flow.Types
(
do_api
,
DataOrigin
(
..
),
DataText
(
..
),
FlowCorpus
)
import
Gargantext.Database.Action.Flow.Utils
(
documentIdWithNgrams
,
insertDocNgrams
,
insertDocs
,
mapNodeIdNgrams
,
ngramsByDoc
)
import
Gargantext.Database.Action.Flow.Types
(
do_api
,
DataOrigin
(
..
),
DataText
(
..
),
FlowCorpus
,
DocumentIdWithNgrams
(
..
)
)
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
,
insertDocs
,
mkNodeIdNgramsMap
,
ngramsByDoc
,
documentIdWithNgrams
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
,
updateContextScore
)
import
Gargantext.Database.Action.Search
(
searchDocInDatabase
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataAnnuaire
,
HyperdataCorpus
(
_hc_lang
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
ToHyperdataDocument
(
toHyperdataDocument
),
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Node
hiding
(
DEBUG
)
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Admin.Types.Node
hiding
(
ERROR
,
DEBUG
)
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
(
ContextNodeNgrams2Poly
(
..
),
insertContextNodeNgrams2
)
import
Gargantext.Database.Query.Table.Node
(
MkCorpus
,
insertDefaultNodeIfNotExists
,
getOrMkList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ToNode
(
toNode
)
)
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ToNode
(
toNode
)
,
UniqParameters
(
..
),
newUniqIdHash
)
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodesOnlyId
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
NgramsId
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
),
MonadLogger
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Types
import
Gargantext.Prelude
hiding
(
catch
,
onException
,
to
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
,
ERROR
),
MonadLogger
)
import
Gargantext.Utils.Jobs.Monad
(
JobHandle
,
MonadJobStatus
(
..
)
)
------------------------------------------------------------------------
...
...
@@ -168,6 +172,7 @@ flowDataText :: forall env err m.
,
HasTreeError
err
,
HasValidationError
err
,
MonadJobStatus
m
,
MonadCatch
m
,
HasCentralExchangeNotification
env
)
=>
User
...
...
@@ -202,6 +207,7 @@ flowAnnuaire :: ( IsDBCmd env err m
,
HasTreeError
err
,
HasValidationError
err
,
MonadJobStatus
m
,
MonadCatch
m
,
HasCentralExchangeNotification
env
)
=>
MkCorpusUser
->
TermType
Lang
...
...
@@ -221,6 +227,7 @@ flowCorpusFile :: ( IsDBCmd env err m
,
HasTreeError
err
,
HasValidationError
err
,
MonadJobStatus
m
,
MonadCatch
m
,
HasCentralExchangeNotification
env
)
=>
MkCorpusUser
->
TermType
Lang
...
...
@@ -250,7 +257,8 @@ flowCorpus :: ( IsDBCmd env err m
,
HasValidationError
err
,
FlowCorpus
a
,
MonadJobStatus
m
,
HasCentralExchangeNotification
env
)
,
MonadCatch
m
,
HasCentralExchangeNotification
env
,
Show
a
)
=>
MkCorpusUser
->
TermType
Lang
->
Maybe
FlowSocialListWith
...
...
@@ -271,6 +279,7 @@ flow :: forall env err m a c.
,
MkCorpus
c
,
MonadJobStatus
m
,
HasCentralExchangeNotification
env
,
MonadCatch
m
,
Show
a
)
=>
Maybe
c
->
MkCorpusUser
...
...
@@ -309,6 +318,8 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
,
HasNLPServer
env
,
FlowCorpus
document
,
MkCorpus
corpus
,
MonadLogger
m
,
MonadCatch
m
,
Show
document
)
=>
Maybe
corpus
->
TermType
Lang
...
...
@@ -318,7 +329,13 @@ addDocumentsToHyperCorpus :: ( IsDBCmd env err m
addDocumentsToHyperCorpus
mb_hyper
la
corpusId
docs
=
do
cfg
<-
view
hasConfig
nlp
<-
view
(
nlpServerGet
$
_tt_lang
la
)
ids
<-
insertMasterDocs
cfg
nlp
mb_hyper
la
docs
-- First extract all the ngrams for the input documents via the nlp server,
-- collect errors (if any) and pass to 'insertMasterDocs' only the documents
-- for which the ngrams extraction succeeded. At the moment errors are just
-- logged, but in the future they could be returned upstream so that we can
-- display a final result of how many were skipped, how many succeded etc.
uncommittedNgrams
<-
extractNgramsFromDocuments
nlp
la
docs
ids
<-
runDBTx
$
insertMasterDocs
cfg
uncommittedNgrams
mb_hyper
docs
runDBTx
$
do
void
$
Doc
.
add
corpusId
(
map
nodeId2ContextId
ids
)
pure
ids
...
...
@@ -413,44 +430,147 @@ buildSocialList l user userCorpusId listId ctype = \case
_mastListId
<-
runDBTx
$
getOrMkList
masterCorpusId
masterUserId
pure
()
-------------------------------------------------------------------------------
--
-- Splitting Ngrams extraction from document creation
--
-------------------------------------------------------------------------------
--
-- There is a bit of tension between extracting the Ngrams and creating the documents:
-- We need to produce a map between a given 'NodeId' and the ngrams associated with it, where
-- the latter are extract via the NLP server. However, each ngrams has to be matched to the
-- NodeId associated with the new resource being created as part of 'insertMasterDocs'. This
-- creates a bit of a chicken-and-egg problem in trying to make 'insertMasterDocs' a 'DBUpdate'
-- function: we need a 'NodeId' to exist by the time we call 'extractNgrams' but the latter can't
-- be execute in a pure fashion without a 'NodeId'.
--
-- To fix this, we need a data structure which would index the ngrams by some other notion of
-- index, and later have a transformation function which would re-index these ngrams to the actual
-- 'NodeId' created during the DB Transaction.
-- | Ngrams that have been fully \"committed\", i.e. associated to the respective document
-- where the latter has been persisted (i.e. committed) on secondary storage.
type
CommittedNgrams
=
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
newtype
DocumentHashId
=
DocumentHashId
{
_DocumentHashId
::
T
.
Text
}
deriving
stock
(
Show
,
Eq
)
deriving
newtype
Ord
-- | Ngrams that have been extracted from the input 'doc' but not fully associated with
-- a persisted entity on the database.
newtype
UncommittedNgrams
doc
=
UncommittedNgrams
{
_UncommittedNgrams
::
Map
.
Map
DocumentHashId
(
DocumentIdWithNgrams
DocumentHashId
doc
ExtractedNgrams
)
}
deriving
stock
Show
deriving
newtype
(
Semigroup
,
Monoid
)
data
InsertDocError
=
NgramsNotFound
!
(
Maybe
DocumentHashId
)
!
DocId
deriving
Show
extractNgramsFromDocument
::
(
UniqParameters
doc
,
HasText
doc
,
ExtractNgrams
doc
,
IsDBCmd
err
env
m
,
MonadLogger
m
,
MonadCatch
m
)
=>
NLPServerConfig
->
TermType
Lang
->
doc
->
m
(
UncommittedNgrams
doc
)
extractNgramsFromDocument
nlpServer
lang
doc
=
-- In case of an exception from the NLP server, treat this as having no ngrams,
-- but still index it in the final map, so that later reconciliation still works.
-- Pratically speaking it means this won't have any ngrams associated, but the document
-- will still be added to the corpus and we can try to regen the ngrams at a later stage.
UncommittedNgrams
.
Map
.
singleton
docId
<$>
(
documentIdWithNgrams
(
extractNgrams
nlpServer
$
withLang
lang
[
doc
])
(
Indexed
docId
doc
)
`
catch
`
\
(
e
::
SomeException
)
->
do
$
(
logLocM
)
ERROR
$
T
.
pack
$
"Document with hash "
<>
show
docId
<>
" failed ngrams extraction due to an exception: "
<>
displayException
e
pure
$
DocumentIdWithNgrams
(
Indexed
docId
doc
)
mempty
)
where
docId
=
DocumentHashId
$
newUniqIdHash
doc
commitNgramsForDocument
::
UniqParameters
doc
=>
UncommittedNgrams
doc
->
Indexed
ContextId
(
Node
doc
)
->
Either
InsertDocError
CommittedNgrams
commitNgramsForDocument
(
UncommittedNgrams
ng
)
(
Indexed
oldIx
node
)
=
do
docId
<-
mb_docId
case
Map
.
lookup
docId
ng
of
Nothing
->
Left
$
NgramsNotFound
(
Just
docId
)
(
_node_id
node
)
Just
ngs
->
Right
$
mkNodeIdNgramsMap
[
reIndex
ngs
]
where
mb_docId
=
case
DocumentHashId
<$>
_node_hash_id
node
of
Nothing
->
Left
$
NgramsNotFound
Nothing
(
_node_id
node
)
Just
dId
->
Right
dId
reIndex
::
DocumentIdWithNgrams
DocumentHashId
doc
ExtractedNgrams
->
DocumentIdWithNgrams
NodeId
doc
ExtractedNgrams
reIndex
did
=
let
(
Indexed
_
a
)
=
documentWithId
did
in
did
{
documentWithId
=
Indexed
(
contextId2NodeId
oldIx
)
a
}
extractNgramsFromDocuments
::
forall
doc
env
err
m
.
(
HasText
doc
,
UniqParameters
doc
,
ExtractNgrams
doc
,
IsDBCmd
env
err
m
,
MonadLogger
m
,
MonadCatch
m
)
=>
NLPServerConfig
->
TermType
Lang
->
[
doc
]
->
m
(
UncommittedNgrams
doc
)
extractNgramsFromDocuments
nlpServer
lang
docs
=
foldlM
go
mempty
docs
where
go
::
UncommittedNgrams
doc
->
doc
->
m
(
UncommittedNgrams
doc
)
go
!
acc
inputDoc
=
do
ngrams
<-
extractNgramsFromDocument
nlpServer
lang
inputDoc
pure
$
acc
<>
ngrams
commitNgramsForDocuments
::
UniqParameters
doc
=>
UncommittedNgrams
doc
->
[
Indexed
ContextId
(
Node
doc
)]
->
([
InsertDocError
],
CommittedNgrams
)
commitNgramsForDocuments
ng
nodes
=
let
(
errs
,
successes
)
=
partitionEithers
$
map
(
commitNgramsForDocument
ng
)
nodes
in
(
errs
,
mconcat
successes
)
-- FIME(adn): the use of 'extractNgramsT' is iffy and problematic -- we shouldn't
-- be contacting the NLP server in the middle of some DB ops! we should extract
-- the tokens /before/ inserting things into the DB.
insertMasterDocs
::
(
HasNodeError
err
,
FlowCorpus
a
,
MkCorpus
c
,
IsDBCmd
env
err
m
,
UniqParameters
doc
,
FlowCorpus
do
c
,
MkCorpus
c
,
Show
doc
)
=>
GargConfig
->
NLPServerConfig
->
UncommittedNgrams
doc
-- ^ The ngrams extracted for /all/ the documents
-- and indexed by the hash of the given document.
-- We can use this map to associate the document
-- with the node being created.
->
Maybe
c
->
TermType
Lang
->
[
a
]
->
m
[
DocId
]
insertMasterDocs
cfg
nlpServer
c
lang
hs
=
do
(
masterUserId
,
masterCorpusId
,
documentsWithId
,
ids'
)
<-
runDBTx
$
do
(
master_user_id
,
_
,
master_corpus_id
)
<-
getOrMkRootWithCorpus
cfg
MkCorpusUserMaster
c
(
ids_prime
,
documents_with_id
)
<-
insertDocs
master_user_id
master_corpus_id
(
map
(
toNode
master_user_id
Nothing
)
hs
)
_
<-
Doc
.
add
master_corpus_id
ids_prime
pure
(
master_user_id
,
master_corpus_id
,
documents_with_id
,
ids_prime
)
->
[
doc
]
->
DBUpdate
err
[
DocId
]
insertMasterDocs
cfg
uncommittedNgrams
c
hs
=
do
(
masterUserId
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
cfg
MkCorpusUserMaster
c
(
ids'
,
documentsWithId
)
<-
insertDocs
masterUserId
masterCorpusId
(
map
(
toNode
masterUserId
Nothing
)
hs
)
_
<-
Doc
.
add
masterCorpusId
ids'
-- TODO
-- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
nlpServer
$
withLang
lang
documentsWithId
)
(
map
(
B
.
first
contextId2NodeId
)
documentsWithId
)
let
(
_failedExtraction
,
ngramsDocsMap
)
=
commitNgramsForDocuments
uncommittedNgrams
documentsWithId
runDBTx
$
do
lId
<-
getOrMkList
masterCorpusId
masterUserId
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
pure
$
map
contextId2NodeId
ids'
lId
<-
getOrMkList
masterCorpusId
masterUserId
_
<-
saveDocNgramsWith
lId
ngramsDocsMap
pure
$
map
contextId2NodeId
ids'
saveDocNgramsWith
::
ListId
...
...
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
337d2af5
...
...
@@ -25,7 +25,7 @@ import Gargantext.Core (Lang, NLPServerConfig(server))
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
splitOn
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
T
,
ExtractedNgrams
(
..
),
TermType
,
cleanExtractedNgrams
,
enrichedTerms
,
extractNgramsT
,
extractTerms
,
tt_lang
)
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
(
..
),
ExtractedNgrams
(
..
),
TermType
,
cleanExtractedNgrams
,
enrichedTerms
,
extractTerms
,
tt_lang
)
import
Gargantext.Core.Types
(
POS
(
NP
),
TermsCount
,
TermsWeight
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
...
...
@@ -39,59 +39,57 @@ import Gargantext.Prelude
------------------------------------------------------------------------
instance
ExtractNgramsT
HyperdataContact
where
extractNgramsT
_ncs
l
hc
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extract
l
hc
where
extract
::
TermType
Lang
->
HyperdataContact
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
instance
ExtractNgrams
HyperdataContact
where
extractNgrams
_ncs
_l
=
pure
.
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
.
extract
where
extract
::
HyperdataContact
->
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
)
extract
hc'
=
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
in
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance
ExtractNgramsT
HyperdataDocument
where
extractNgramsT
::
NLPServerConfig
->
TermType
Lang
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
doc
^.
hd_source
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
Institutes
(
doc
^.
hd_bdd
))
$
doc
^.
hd_institutes
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
Authors
(
doc
^.
hd_bdd
))
$
doc
^.
hd_authors
termsWithCounts'
::
[(
NgramsPostag
,
TermsCount
)]
<-
map
(
first
(
enrichedTerms
(
lang
^.
tt_lang
)
(
server
ncs
)
NP
))
.
concat
<$>
liftBase
(
extractTerms
ncs
lang
$
hasText
doc
)
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
source
,
(
DM
.
singleton
Sources
1
,
1
))
]
<>
[(
SimpleNgrams
i'
,
(
DM
.
singleton
Institutes
1
,
1
))
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
instance
(
ExtractNgramsT
a
,
HasText
a
)
=>
ExtractNgramsT
(
Node
a
)
instance
ExtractNgrams
HyperdataDocument
where
extractNgrams
::
NLPServerConfig
->
TermType
Lang
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgrams
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeight
,
TermsCount
))
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
doc
^.
hd_source
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
Institutes
(
doc
^.
hd_bdd
))
$
doc
^.
hd_institutes
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
Authors
(
doc
^.
hd_bdd
))
$
doc
^.
hd_authors
termsWithCounts'
::
[(
NgramsPostag
,
TermsCount
)]
<-
map
(
first
(
enrichedTerms
(
lang
^.
tt_lang
)
(
server
ncs
)
NP
))
.
concat
<$>
liftBase
(
extractTerms
ncs
lang
$
hasText
doc
)
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
source
,
(
DM
.
singleton
Sources
1
,
1
))
]
<>
[(
SimpleNgrams
i'
,
(
DM
.
singleton
Institutes
1
,
1
))
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
DM
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCounts'
]
instance
(
ExtractNgrams
a
,
HasText
a
)
=>
ExtractNgrams
(
Node
a
)
where
extractNgrams
T
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgramsT
ncs
l
h
extractNgrams
ncs
l
(
Node
{
_node_hyperdata
=
h
})
=
extractNgrams
ncs
l
h
instance
HasText
a
=>
HasText
(
Node
a
)
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
337d2af5
...
...
@@ -25,7 +25,7 @@ import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
T
)
import
Gargantext.Core.Text.Terms
(
ExtractNgrams
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
...
...
@@ -52,7 +52,7 @@ type FlowCmdM env err m =
type
FlowCorpus
a
=
(
UniqParameters
a
,
InsertDb
a
,
ExtractNgrams
T
a
,
ExtractNgrams
a
,
HasText
a
,
ToNode
a
,
ToJSON
a
...
...
@@ -66,9 +66,9 @@ type FlowInsertDB a = ( AddUniqId a
data
DocumentIdWithNgrams
a
b
=
data
DocumentIdWithNgrams
ix
a
b
=
DocumentIdWithNgrams
{
documentWithId
::
Indexed
NodeId
a
{
documentWithId
::
Indexed
ix
a
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
TermsWeight
,
TermsCount
)
}
deriving
(
Show
)
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
337d2af5
...
...
@@ -13,9 +13,10 @@ module Gargantext.Database.Action.Flow.Utils
(
docNgrams
,
docNgrams'
,
documentIdWithNgrams
,
mapDocumentIdWithNgrams
,
insertDocNgrams
,
insertDocs
,
m
apNodeIdNgrams
,
m
kNodeIdNgramsMap
,
ngramsByDoc
)
where
...
...
@@ -39,7 +40,6 @@ import Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.ContextNodeNgrams
(
ContextNodeNgramsPoly
(
..
),
insertContextNodeNgrams
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ReturnId
,
addUniqId
,
insertDb
,
reId
,
reInserted
,
reUniqId
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Schema.Context
(
context_oid_hyperdata
,
context_oid_id
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTypeId
(
..
),
text2ngrams
)
import
Gargantext.Database.Types
(
Indexed
(
..
),
index
)
...
...
@@ -94,34 +94,40 @@ docNgrams' lang ts txt =
termsInText
lang
(
buildPatternsWith
lang
ts
)
txt
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
DBCmd
err
(
HashMap
.
HashMap
b
(
Map
NgramsType
TermsWeight
,
TermsCount
))
)
->
[
Indexed
NodeId
a
]
->
DBCmd
err
[
DocumentIdWithNgrams
a
b
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
documentIdWithNgrams
::
Monad
m
=>
(
a
->
m
(
HashMap
.
HashMap
b
(
Map
NgramsType
TermsWeight
,
TermsCount
))
)
->
Indexed
ix
a
->
m
(
DocumentIdWithNgrams
ix
a
b
)
documentIdWithNgrams
f
=
toDocumentIdWithNgrams
where
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
_unIndex
d
pure
$
DocumentIdWithNgrams
{
documentWithId
=
d
,
documentNgrams
=
e
}
mapDocumentIdWithNgrams
::
Monad
m
=>
(
a
->
m
(
HashMap
.
HashMap
b
(
Map
NgramsType
TermsWeight
,
TermsCount
))
)
->
[
Indexed
ix
a
]
->
m
[
DocumentIdWithNgrams
ix
a
b
]
mapDocumentIdWithNgrams
f
=
mapM
(
documentIdWithNgrams
f
)
-- | TODO check optimization
mapNodeIdNgrams
::
(
Ord
b
,
Hashable
b
)
=>
[
DocumentIdWithNgrams
a
b
]
-- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list.
-- TODO check optimization
mkNodeIdNgramsMap
::
forall
ix
a
b
.
(
Ord
b
,
Hashable
b
,
Ord
ix
)
=>
[
DocumentIdWithNgrams
ix
a
b
]
->
HashMap
.
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
))
(
Map
ix
(
TermsWeight
,
TermsCount
))
)
m
apNodeIdNgrams
=
HashMap
.
unionsWith
(
DM
.
unionWith
(
DM
.
unionWith
addTuples
))
.
fmap
f
m
kNodeIdNgramsMap
=
HashMap
.
unionsWith
(
DM
.
unionWith
(
DM
.
unionWith
addTuples
))
.
fmap
f
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f
::
DocumentIdWithNgrams
a
b
->
HashMap
.
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
f
::
DocumentIdWithNgrams
ix
a
b
->
HashMap
.
HashMap
b
(
Map
NgramsType
(
Map
ix
(
TermsWeight
,
TermsCount
)))
f
d
=
fmap
(
\
(
ngramsTypeMap
,
cnt
)
->
fmap
(
\
w
->
DM
.
singleton
nId
(
w
,
cnt
))
ngramsTypeMap
)
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Contact.hs
View file @
337d2af5
...
...
@@ -43,7 +43,7 @@ instance NFData HyperdataContact where
instance
HasText
HyperdataContact
where
hasText
=
undefined
hasText
=
mempty
defaultHyperdataContact
::
HyperdataContact
defaultHyperdataContact
=
...
...
src/Gargantext/Database/Admin/Types/Node.hs
View file @
337d2af5
...
...
@@ -292,7 +292,7 @@ instance ToSchema NodeId
-- | An identifier for a 'Context' in gargantext.
newtype
ContextId
=
UnsafeMkContextId
{
_ContextId
::
Int
}
deriving
stock
(
Show
,
Eq
,
Ord
,
Generic
)
deriving
newtype
(
Csv
.
ToField
,
ToJSONKey
,
FromJSONKey
,
ToJSON
,
FromJSON
,
ToField
,
ToSchema
)
deriving
newtype
(
Csv
.
ToField
,
ToJSONKey
,
FromJSONKey
,
ToJSON
,
FromJSON
,
ToField
,
ToSchema
,
Hashable
)
deriving
anyclass
ToExpr
deriving
FromField
via
NodeId
...
...
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
View file @
337d2af5
...
...
@@ -72,7 +72,7 @@ import Gargantext.Database.Prelude
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Prelude
hiding
(
hash
,
toLower
)
import
Gargantext.Prelude.Crypto.Hash
(
hash
)
import
Gargantext.Prelude.Crypto.Hash
(
hash
,
Hash
)
{-| To Print result query
import Data.ByteString.Internal (ByteString)
...
...
@@ -221,9 +221,9 @@ instance UniqParameters HyperdataContact
where
uniqParameters
_
=
""
instance
UniqParameters
(
Node
a
)
instance
UniqParameters
a
=>
UniqParameters
(
Node
a
)
where
uniqParameters
_
=
undefined
uniqParameters
=
uniqParameters
.
_node_hyperdata
filterText
::
Text
->
Text
...
...
@@ -232,9 +232,13 @@ filterText = DT.toLower . DT.filter isAlphaNum
instance
(
UniqParameters
a
,
ToJSON
a
,
HasDBid
NodeType
)
=>
AddUniqId
(
Node
a
)
where
addUniqId
(
Node
nid
_
t
u
p
n
d
h
)
=
Node
nid
(
Just
newHash
)
t
u
p
n
d
h
addUniqId
node
=
node
{
_node_hash_id
=
Just
$
newUniqIdHash
node
}
where
newHash
=
"
\\
x"
<>
hash
(
uniqParameters
h
)
-- | Returns a new unique ID computed by hashing the uniq parameters of the input
-- and prefixing everything with '\\x'.
newUniqIdHash
::
UniqParameters
a
=>
a
->
Hash
newUniqIdHash
a
=
"
\\
x"
<>
hash
(
uniqParameters
a
)
---------------------------------------------------------------------------
...
...
test/Test/Database/Types.hs
View file @
337d2af5
...
...
@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-orphans #-}
{-# LANGUAGE TypeApplications #-}
module
Test.Database.Types
where
...
...
@@ -24,6 +25,7 @@ import Data.Map qualified as Map
import
Data.Pool
import
Database.PostgreSQL.Simple
qualified
as
PG
import
Database.Postgres.Temp
qualified
as
Tmp
import
GHC.IO.Exception
(
userError
)
import
Gargantext
hiding
(
to
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Errors.Types
...
...
@@ -33,13 +35,12 @@ import Gargantext.Core.Config.Mail (MailConfig(..), LoginType(NoAuth), SendEmail
import
Gargantext.Core.Mail.Types
(
HasMail
(
..
))
import
Gargantext.Core.NLP
(
HasNLPServer
(
..
))
import
Gargantext.Core.NodeStory
import
Gargantext.System.Logging
(
HasLogger
(
..
),
Logger
,
MonadLogger
(
..
))
import
Gargantext.System.Logging
(
HasLogger
(
..
),
Logger
,
MonadLogger
(
..
)
,
LogLevel
(
..
)
)
import
Gargantext.System.Logging.Loggers
import
Gargantext.Utils.Jobs.Monad
(
MonadJobStatus
(
..
))
import
Network.URI
(
parseURI
)
import
Prelude
qualified
import
System.Log.FastLogger
qualified
as
FL
import
GHC.IO.Exception
(
userError
)
newtype
Counter
=
Counter
{
_Counter
::
IORef
Int
}
...
...
@@ -75,6 +76,19 @@ newtype TestMonadM env err a = TestMonad { _TestMonad :: ReaderT env IO a }
,
MonadThrow
)
instance
HasLogger
(
TestMonadM
TestEnv
BackendInternalError
)
where
data
instance
Logger
(
TestMonadM
TestEnv
BackendInternalError
)
=
TestLogger
{
_IOLogger
::
IOStdLogger
}
type
instance
LogInitParams
(
TestMonadM
TestEnv
BackendInternalError
)
=
LogConfig
type
instance
LogPayload
(
TestMonadM
TestEnv
BackendInternalError
)
=
Prelude
.
String
initLogger
cfg
=
fmap
TestLogger
$
(
liftIO
$
ioStdLogger
cfg
)
destroyLogger
=
liftIO
.
_iosl_destroy
.
_IOLogger
logMsg
(
TestLogger
ioLogger
)
lvl
msg
=
liftIO
$
_iosl_log_msg
ioLogger
lvl
msg
logTxt
(
TestLogger
ioLogger
)
lvl
msg
=
liftIO
$
_iosl_log_txt
ioLogger
lvl
msg
instance
MonadLogger
(
TestMonadM
TestEnv
BackendInternalError
)
where
getLogger
=
TestMonad
$
do
initLogger
@
(
TestMonadM
TestEnv
BackendInternalError
)
(
LogConfig
Nothing
ERROR
)
runTestMonadM
::
env
->
TestMonadM
env
err
a
->
IO
a
runTestMonadM
env
=
flip
runReaderT
env
.
_TestMonad
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment