[flow] more massaging of Flow.hs file, refactorings

parent c5587f20
......@@ -23,9 +23,10 @@ import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
import Gargantext.API.Errors.Types
import Gargantext.API.Node () -- instances
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Prelude
......
......@@ -15,11 +15,11 @@ Import a corpus binary.
module Main where
import Data.List.NonEmpty qualified as NE
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Errors.Types
import Gargantext.API.Node () -- instances only
import Gargantext.Core.Types.Individu (User(..), arbitraryNewUsers, NewUser(..), arbitraryUsername, GargPassword(..))
import Gargantext.Database.Action.Flow (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Trigger.Init (initFirstTriggers, initLastTriggers)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
......@@ -27,9 +27,9 @@ import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, DBCmd)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.User (insertNewUsers, )
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import qualified Data.List.NonEmpty as NE
main :: IO ()
......
......@@ -42,10 +42,11 @@ import Gargantext.Core (Lang(..), withDefaultLanguage, defaultLanguage)
import Gargantext.Core.NodeStory (HasNodeStoryImmediateSaver, HasNodeArchiveStoryImmediateSaver, currentVersion, NgramsStatePatch')
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers qualified as Parser (FileType(..), parseFormatC)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText, TermType(..){-, allDataOrigins-})
import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Database.Action.Node (mkNodeWithParent)
......
......@@ -21,7 +21,7 @@ import Data.Swagger
import Data.Text qualified as T
import Gargantext.API.Admin.Orchestrator.Types qualified as Types
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..))
import Gargantext.Database.Action.Flow.Types (DataOrigin(..))
import Gargantext.Prelude
import Test.QuickCheck
......
......@@ -36,8 +36,8 @@ import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Flow (flowDataText)
import Gargantext.Database.Action.Flow.Types (DataText(..), FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
......
......@@ -24,8 +24,7 @@ Portability : POSIX
{-# LANGUAGE TypeOperators #-}
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
( DataText(..)
, getDataText
( getDataText
, getDataText_Debug
, flowDataText
, flow
......@@ -34,28 +33,15 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, flowCorpus
, flowCorpusUser
, flowAnnuaire
, insertMasterDocs
, saveDocNgramsWith
, addDocumentsToHyperCorpus
, reIndexWith
, getOrMkRoot
, getOrMk_RootWithCorpus
, TermType(..)
, DataOrigin(..)
, allDataOrigins
, do_api
)
where
import Conduit
import Control.Lens hiding (elements, Indexed)
import Data.Bifunctor qualified as B
import Data.Conduit qualified as C
import Conduit (ConduitT, (.|), mapC, mapM_C, runConduit, sinkNull, transPipe, yieldMany)
import Control.Lens ((^.), over, view)
import Data.Conduit.Internal (zipSources)
import Data.Conduit.List qualified as CL
import Data.Conduit.List qualified as CList
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
......@@ -72,38 +58,31 @@ import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms (ExtractedNgrams(SimpleNgrams), TermType(..), tt_lang)
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Types (HasValidationError)
import Gargantext.Core.Types.Individu (User(UserName))
import Gargantext.Core.Types.Main (CorpusName, ListType(MapTerm))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.Utils (docNgrams, documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (buildSocialList, createNodes, docNgrams, insertMasterDocs, saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (DataOrigin(..), DataText(..), FlowCorpus, printDataText)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node hiding (DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAnnuaire, HyperdataContact, HyperdataCorpus, hc_lang, toHyperdataDocument)
import Gargantext.Database.Admin.Types.Node hiding (DEBUG)
import Gargantext.Database.Prelude (DbCmd', DBCmd, hasConfig)
import Gargantext.Database.Query.Table.ContextNodeNgrams2
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Ngrams (NgramsType(NgramsTerms), text2ngrams)
import Gargantext.Database.Query.Table.Node (MkCorpus, getNodeWith)
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..))
import Gargantext.System.Logging
import Gargantext.System.Logging (LogLevel(DEBUG), MonadLogger, logLocM)
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import PUBMED.Types qualified as PUBMED
......@@ -111,20 +90,8 @@ import PUBMED.Types qualified as PUBMED
-- Imports for upgrade function
import Gargantext.Database.Query.Tree (HasTreeError)
------------------------------------------------------------------------
allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs
---------------
-- Show instance is not possible because of IO
printDataText :: DataText -> IO ()
printDataText (DataOld xs) = putText $ show xs
printDataText (DataNew (maybeInt, conduitData)) = do
res <- C.runConduit (conduitData .| CL.consume)
putText $ show (maybeInt, res)
-- TODO use the split parameter in config file
getDataText :: (HasNodeError err)
=> DataOrigin
......@@ -314,29 +281,6 @@ addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
pure ids
------------------------------------------------------------------------
createNodes :: ( DbCmd' env err m, HasNodeError err
, MkCorpus c
)
=> User
-> Either CorpusName [CorpusId]
-> Maybe c
-> m (UserId, CorpusId, ListId)
createNodes user corpusName ctype = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first
_tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
-- printDebug "NodeTexts: " tId
-- NodeList is second
listId <- getOrMkList userCorpusId userId
-- User Graph Flow
_ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
-- _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
pure (userId, userCorpusId, listId)
flowCorpusUser :: ( HasNodeError err
, HasValidationError err
......@@ -366,106 +310,6 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do
pure userCorpusId
buildSocialList :: ( HasNodeError err
, HasValidationError err
, HasNLPServer env
, HasTreeError err
, HasNodeStory env err m
, MkCorpus c
)
=> Lang
-> User
-> CorpusId
-> ListId
-> Maybe c
-> Maybe FlowSocialListWith
-> m ()
buildSocialList _l _user _userCorpusId _listId _ctype (Just (NoList _)) = pure ()
buildSocialList l user userCorpusId listId ctype mfslw = do
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
nlpServer <- view (nlpServerGet l)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw
$ GroupWithPosTag l nlpServer HashMap.empty
-- printDebug "flowCorpusUser:ngs" ngs
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
pure ()
insertMasterDocs :: ( DbCmd' env err m
, HasNodeError err
, FlowCorpus a
, MkCorpus c
)
=> NLPServerConfig
-> Maybe c
-> TermType Lang
-> [a]
-> m [DocId]
insertMasterDocs ncs c lang hs = do
(masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left corpusMasterName) c
(ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs )
_ <- Doc.add masterCorpusId ids'
-- TODO
-- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT ncs $ withLang lang documentsWithId)
(map (B.first contextId2NodeId) documentsWithId)
lId <- getOrMkList masterCorpusId masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure $ map contextId2NodeId ids'
saveDocNgramsWith :: (DbCmd' env err m)
=> ListId
-> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
let mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocsNoCount
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- new
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let ngrams2insert = catMaybes [ ContextNodeNgrams2 <$> Just (nodeId2ContextId nId)
<*> (getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms''))
<*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight
]
-- printDebug "Ngrams2Insert" ngrams2insert
_return <- insertContextNodeNgrams2 ngrams2insert
-- to be removed
_ <- insertDocNgrams lId $ HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
pure ()
------------------------------------------------------------------------
......@@ -480,7 +324,7 @@ reIndexWith :: ( HasNodeStory env err m )
reIndexWith cId lId nt lts = do
-- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
corpus_node <- getNodeWith cId (Proxy @HyperdataCorpus)
let corpusLang = withDefaultLanguage $ view (node_hyperdata . to _hc_lang) corpus_node
let corpusLang = withDefaultLanguage $ corpus_node ^. (node_hyperdata . hc_lang)
-- Getting [NgramsTerm]
ts <- List.concat
......
......@@ -18,17 +18,19 @@ Portability : POSIX
module Gargantext.Database.Action.Flow.Types
where
import Conduit (ConduitT)
import Conduit (ConduitT, (.|))
import Control.Lens (makeLenses)
import Data.Aeson (ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Conduit qualified as C
import Data.Conduit.List qualified as CL
import Data.HashMap.Strict (HashMap)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
import Gargantext.Core.Flow.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Text
import Gargantext.Core.Flow.Types (UniqId)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text (HasText)
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms (ExtractNgramsT)
import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
......@@ -40,7 +42,7 @@ import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Types (Indexed)
import Gargantext.Prelude
import Gargantext.System.Logging
import Gargantext.System.Logging (MonadLogger)
type FlowCmdM env err m =
......@@ -88,6 +90,27 @@ deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
data DataText = DataOld ![NodeId]
| DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
--- | DataNew ![[HyperdataDocument]]
-- Show instance is not possible because of IO
printDataText :: DataText -> IO ()
printDataText (DataOld xs) = putText $ show xs
printDataText (DataNew (maybeInt, conduitData)) = do
res <- C.runConduit (conduitData .| CL.consume)
putText $ show (maybeInt, res)
------------------------------------------------------------------------
-- Unused functions
-- allDataOrigins :: [DataOrigin]
-- allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs
......@@ -7,77 +7,140 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This file mostly contains code that is used by Gargantext.Database.Flow.
We want to kepp the G.D.Flow file clean, as it's imported by other
modules and the flow logic is complex.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Database.Action.Flow.Utils
( docNgrams
, documentIdWithNgrams
, insertDocNgrams
, insertDocs
, mapNodeIdNgrams )
( buildSocialList
, createNodes
, docNgrams
, insertMasterDocs
, saveDocNgramsWith )
where
import Control.Lens ((^.))
import Control.Lens ((^.), over, view)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as DM
import Data.Text qualified as T
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core (Lang, toDBid)
import Gargantext.Core (Lang, NLPServerConfig, toDBid)
import Gargantext.Core.Flow.Types (UniqId, uniqId)
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams(GroupWithPosTag))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Terms (ExtractedNgrams, TermType, extracted2ngrams, extractNgramsT, insertExtractedNgrams, withLang)
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Types.Individu (User(UserName))
import Gargantext.Core.Types.Main (CorpusName)
import Gargantext.Core.Utils (addTuples)
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB)
import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List (flowList_DbRepo, toNodeNgramsW')
import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowCorpus, FlowInsertDB)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument, hd_abstract, hd_title)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node (Context, ContextId, CorpusId, DocId, ListId, NodeId, NodeType(NodeGraph, NodeTexts), UserId, contextId2NodeId, nodeId2ContextId)
import Gargantext.Database.Prelude (DBCmd, DbCmd')
import Gargantext.Database.Query.Table.ContextNodeNgrams
import Gargantext.Database.Query.Table.ContextNodeNgrams (ContextNodeNgramsPoly(ContextNodeNgrams), insertContextNodeNgrams)
import Gargantext.Database.Query.Table.ContextNodeNgrams2 (insertContextNodeNgrams2)
import Gargantext.Database.Query.Table.Node (MkCorpus, getOrMkList, insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId, toNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeNgrams (getCgramsId, listInsertDb)
import Gargantext.Database.Query.Tree (HasTreeError)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Context (context_hyperdata, context_id)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Types
import Gargantext.Database.Schema.ContextNodeNgrams2 (ContextNodeNgrams2Poly(ContextNodeNgrams2))
import Gargantext.Database.Schema.Ngrams (Ngrams, NgramsId, NgramsType, NgramsTypeId(..), indexNgrams, _ngramsTerms)
import Gargantext.Database.Types (Indexed(..), index, unIndex)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
insertDocNgrams :: ListId
-> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
-> DBCmd err Int
insertDocNgrams lId m = do
-- printDebug "[insertDocNgrams] ns" ns
insertContextNodeNgrams ns
where
ns = [ ContextNodeNgrams (nodeId2ContextId docId)
lId (ng^.index)
(NgramsTypeId $ toDBid t)
(fromIntegral i)
cnt
| (ng, t2n2i) <- HashMap.toList m
, (t, n2i) <- DM.toList t2n2i
, (docId, (i, cnt)) <- DM.toList n2i
]
createNodes :: ( DbCmd' env err m, HasNodeError err
, MkCorpus c
)
=> User
-> Either CorpusName [CorpusId]
-> Maybe c
-> m (UserId, CorpusId, ListId)
createNodes user corpusName ctype = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first
_tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
-- printDebug "NodeTexts: " tId
-- NodeList is second
listId <- getOrMkList userCorpusId userId
-- User Graph Flow
_ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
-- _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
pure (userId, userCorpusId, listId)
-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
saveDocNgramsWith :: (DbCmd' env err m)
=> ListId
-> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
let mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocsNoCount
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- new
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second DM.keys)
$ HashMap.toList mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let ngrams2insert = catMaybes [ ContextNodeNgrams2 <$> Just (nodeId2ContextId nId)
<*> (getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms''))
<*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- DM.toList mapNgramsTypes
, (nId, (w, _cnt)) <- DM.toList mapNodeIdWeight
]
-- printDebug "Ngrams2Insert" ngrams2insert
_return <- insertContextNodeNgrams2 ngrams2insert
-- to be removed
_ <- insertDocNgrams lId $ HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
pure ()
docNgrams :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
-> Context HyperdataDocument
-> [((MatchedText, TermsCount),
Map NgramsType (Map NodeId Int))]
docNgrams lang nt ts doc =
......@@ -90,38 +153,6 @@ docNgrams lang nt ts doc =
)
(List.cycle [DM.fromList $ [(nt, DM.singleton (doc ^. context_id) 1 )]])
documentIdWithNgrams :: HasNodeError err
=> (a
-> DBCmd err (HashMap.HashMap b (Map NgramsType Int, TermsCount)))
-> [Indexed NodeId a]
-> DBCmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f $ _unIndex d
pure $ DocumentIdWithNgrams d e
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b]
-> HashMap.HashMap b
(Map NgramsType
(Map NodeId (Int, TermsCount))
)
mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> DM.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
where
nId = _index $ documentWithId d
-- TODO Type NodeDocumentUnicised
insertDocs :: ( DbCmd' env err m
......@@ -144,7 +175,96 @@ insertDocs uId cId hs = do
pure (newIds', map (first nodeId2ContextId) documentsWithId)
insertMasterDocs :: ( DbCmd' env err m
, HasNodeError err
, FlowCorpus a
, MkCorpus c
)
=> NLPServerConfig
-> Maybe c
-> TermType Lang
-> [a]
-> m [DocId]
insertMasterDocs ncs c lang hs = do
(masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left corpusMasterName) c
(ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs )
_ <- Doc.add masterCorpusId ids'
-- TODO
-- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT ncs $ withLang lang documentsWithId)
(map (first contextId2NodeId) documentsWithId)
lId <- getOrMkList masterCorpusId masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure $ map contextId2NodeId ids'
buildSocialList :: ( HasNodeError err
, HasValidationError err
, HasNLPServer env
, HasTreeError err
, HasNodeStory env err m
, MkCorpus c
)
=> Lang
-> User
-> CorpusId
-> ListId
-> Maybe c
-> Maybe FlowSocialListWith
-> m ()
buildSocialList _l _user _userCorpusId _listId _ctype (Just (NoList _)) = pure ()
buildSocialList l user userCorpusId listId ctype mfslw = do
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
nlpServer <- view (nlpServerGet l)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw
$ GroupWithPosTag l nlpServer HashMap.empty
-- printDebug "flowCorpusUser:ngs" ngs
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
pure ()
------------------------------------------------------------------------
-- INTERNAL FUNCTIONS
insertDocNgrams :: ListId
-> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
-> DBCmd err Int
insertDocNgrams lId m = do
-- printDebug "[insertDocNgrams] ns" ns
insertContextNodeNgrams ns
where
ns = [ ContextNodeNgrams (nodeId2ContextId docId)
lId (ng^.index)
(NgramsTypeId $ toDBid t)
(fromIntegral i)
cnt
| (ng, t2n2i) <- HashMap.toList m
, (t, n2i) <- DM.toList t2n2i
, (docId, (i, cnt)) <- DM.toList n2i
]
viewUniqId' :: UniqId a
=> a
-> (Hash, a)
......@@ -173,6 +293,41 @@ toInserted =
documentIdWithNgrams :: HasNodeError err
=> (a
-> DBCmd err (HashMap.HashMap b (Map NgramsType Int, TermsCount)))
-> [Indexed NodeId a]
-> DBCmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f (d ^. unIndex)
pure $ DocumentIdWithNgrams d e
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b]
-> HashMap.HashMap b
(Map NgramsType
(Map NodeId (Int, TermsCount))
)
mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> DM.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
where
nId = (documentWithId d) ^. index
-- Apparently unused functions
......
......@@ -27,20 +27,20 @@ module Gargantext.Database.Action.User.New
import Control.Lens (view)
import Control.Monad.Random
import Data.List.NonEmpty qualified as NE
import Data.Text (splitOn)
import Data.Text qualified as Text
import Gargantext.Core.Mail
import Gargantext.Core.Mail.Types (HasMail, mailSettings)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow (getOrMkRoot)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, DBCmd, CmdM)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..), nodeError, NodeError(..))
import Gargantext.Database.Query.Table.User
import Gargantext.Database.Query.Tree.Root (getOrMkRoot)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Pass.User (gargPass)
import Gargantext.Prelude.Mail.Types (MailConfig)
import qualified Data.List.NonEmpty as NE
------------------------------------------------------------------------
-- | Creates a new 'User' from the input 'EmailAddress', which needs to
......
......@@ -16,32 +16,31 @@ import Gargantext.API.Prelude
import Gargantext.Core.NLP
import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow
import Gargantext.Database.Action.User.New
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Trigger.Init
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node (getOrMkList)
-- import Gargantext.Prelude (printDebug)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
import Gargantext.Prelude.Config
import Gargantext.Prelude.Mail qualified as Mail
import Gargantext.Prelude.NLP qualified as NLP
import Gargantext.System.Logging
import Gargantext.Utils.Jobs qualified as Jobs
import Gargantext.Utils.Jobs.Monad qualified as Jobs
import Gargantext.Utils.Jobs.Queue qualified as Jobs
import Gargantext.Utils.Jobs.Settings qualified as Jobs
import Network.HTTP.Client.TLS (newTlsManager)
import Network.Wai (Application)
import Network.Wai.Handler.Warp qualified as Wai
import Network.Wai.Handler.Warp qualified as Warp
import Prelude
import Servant.Auth.Client ()
import Servant.Client
import Servant.Job.Async qualified as ServantAsync
import Test.Database.Setup (withTestDB, fakeIniPath, testEnvToPgConnectionInfo)
import Test.Database.Types
import qualified Gargantext.Prelude.Mail as Mail
import qualified Gargantext.Prelude.NLP as NLP
import qualified Gargantext.Utils.Jobs as Jobs
import qualified Gargantext.Utils.Jobs.Monad as Jobs
import qualified Gargantext.Utils.Jobs.Queue as Jobs
import qualified Gargantext.Utils.Jobs.Settings as Jobs
import qualified Network.Wai.Handler.Warp as Warp
import qualified Network.Wai.Handler.Warp as Wai
import qualified Servant.Job.Async as ServantAsync
newTestEnv :: TestEnv -> Logger (GargM Env BackendInternalError) -> Warp.Port -> IO Env
......
......@@ -13,36 +13,35 @@ Portability : POSIX
module Test.Database.Operations.DocumentSearch where
import Prelude
import Control.Lens (view)
import Control.Monad.Reader
import Data.Aeson.QQ.Simple
import Data.Aeson.Types
-- import Gargantext.API.Node.Update (updateDocs)
import Data.Text qualified as T
import Gargantext.Core
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Text.Terms.Mono.Stem.En
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Node (NodePoly(..))
-- import Network.URI (parseURI)
import Gargantext.Prelude
import Test.Database.Types
import Test.Hspec.Expectations
import Test.Tasty.HUnit
import Gargantext.Core.Text.Terms.Mono.Stem.En
import Gargantext.Database.Admin.Config (userMaster)
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.Query as API
import Gargantext.Database.Query.Facet
exampleDocument_01 :: HyperdataDocument
exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_01 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":"01"
, "publication_day":6
, "language_iso2":"EN"
......@@ -63,7 +62,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
|]
exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":""
, "uniqId": "1405.3072v3"
, "bdd": "Arxiv"
......@@ -83,7 +82,7 @@ exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
|]
exampleDocument_03 :: HyperdataDocument
exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_03 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{
"bdd": "Arxiv"
, "doi": ""
......@@ -101,7 +100,7 @@ exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
|]
exampleDocument_04 :: HyperdataDocument
exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_04 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{
"bdd": "Arxiv"
, "doi": ""
......@@ -141,7 +140,7 @@ stemmingTest _env = do
stemIt "PyPlasm:" `shouldBe` "PyPlasm:"
mkQ :: T.Text -> API.Query
mkQ txt = either (\e -> error $ "(query) = " <> T.unpack txt <> ": " <> e) id . API.parseQuery . API.RawQuery $ txt
mkQ txt = either (\e -> errorTrace $ "(query) = " <> T.unpack txt <> ": " <> e) identity . API.parseQuery . API.RawQuery $ txt
corpusSearch01 :: TestEnv -> Assertion
corpusSearch01 env = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment