Flow.hs 20.3 KB
Newer Older
1 2 3 4 5 6 7 8 9
{-|
Module      : Gargantext.Database.Flow
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

10 11 12 13
-- TODO-ACCESS:
--   check userId       CanFillUserCorpus   userCorpusId
--   check masterUserId CanFillMasterCorpus masterCorpusId

14 15
-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
16 17
-}

18
{-# LANGUAGE ConstrainedClassMethods #-}
19 20
{-# LANGUAGE ConstraintKinds         #-}
{-# LANGUAGE InstanceSigs            #-}
21
{-# LANGUAGE ScopedTypeVariables     #-}
22
{-# LANGUAGE TemplateHaskell         #-}
23 24
{-# LANGUAGE TypeApplications  #-}
{-# LANGUAGE TypeOperators           #-}
25

26
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
27 28
  ( DataText(..)
  , getDataText
29
  , getDataText_Debug
30
  , flowDataText
31
  , flow
32

33 34
  , flowCorpusFile
  , flowCorpus
35
  , flowCorpusUser
36
  , flowAnnuaire
37
  , insertMasterDocs
38
  , buildSocialList
39
  , saveDocNgramsWith
40
  , addDocumentsToHyperCorpus
41

42 43
  , reIndexWith

44
  , getOrMkRoot
45
  , getOrMkRootWithCorpus
46 47
  , TermType(..)
  , DataOrigin(..)
48 49 50
  , allDataOrigins

  , do_api
51
  )
52
    where
53

54
import Conduit
55
import Control.Lens ( to, view, over )
56
import Data.Bifunctor qualified as B
57
import Data.Conduit qualified as C
58
import Data.Conduit.Internal (zipSources)
59 60 61 62 63 64 65
import Data.Conduit.List qualified as CL
import Data.Conduit.List qualified as CList
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
66
import EPO.API.Client.Types qualified as EPO
67
import Gargantext.API.Ngrams.Tools (getTermsWith)
68
import Gargantext.Core (Lang(..), NLPServerConfig, withDefaultLanguage)
69
import Gargantext.Core.Notifications.CentralExchange.Types (HasCentralExchangeNotification(ce_notify), CEMessage(..))
70
import Gargantext.Core.Config (GargConfig(..), hasConfig)
71
import Gargantext.Core.Config.Types (APIsConfig(..))
72
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
73
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
74
import Gargantext.Core.NodeStory.Types (HasNodeStory)
75
import Gargantext.Core.Text.Corpus.API qualified as API
76
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
77
import Gargantext.Core.Text.List (buildNgramsLists)
78
import Gargantext.Core.Text.List.Group.WithStem (GroupParams(..))
79
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
80
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms), Ngrams(_ngramsTerms))
81
import Gargantext.Core.Text.Terms
82
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
83
import Gargantext.Core.Types (HasValidationError, TermsCount)
84
import Gargantext.Core.Types.Individu (User(..))
85
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
86
import Gargantext.Core.Types.Query (Limit)
87
import Gargantext.Database.Action.Flow.Extract ()  -- ExtractNgramsT instances
88 89
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus )
90
import Gargantext.Database.Action.Flow.Utils (docNgrams, documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams)
91
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
92
import Gargantext.Database.Action.Search (searchDocInDatabase)
93 94 95
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument) )
96
import Gargantext.Database.Admin.Types.Node hiding (DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
97
import Gargantext.Database.Prelude (IsDBCmd, DBCmdWithEnv)
98 99
import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 )
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
100
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
101
import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode) ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
102
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
103
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
104
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
105
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
106
import Gargantext.Database.Schema.Ngrams ( indexNgrams, text2ngrams )
107
import Gargantext.Database.Schema.Node (node_hyperdata)
108
import Gargantext.Prelude hiding (to)
109 110
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
111
import PUBMED.Types qualified as PUBMED
112

113
------------------------------------------------------------------------
114
-- Imports for upgrade function
115
import Gargantext.Database.Query.Tree.Error ( HasTreeError )
116

117
------------------------------------------------------------------------
118

119 120
allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs
121 122

---------------
123

124 125
-- Show instance is not possible because of IO
printDataText :: DataText -> IO ()
126
printDataText (DataOld xs) = putText $ show xs
127 128
printDataText (DataNew (maybeInt, conduitData)) = do
  res <- C.runConduit (conduitData .| CL.consume)
129
  putText $ show (maybeInt, res)
130

131
-- TODO use the split parameter in config file
132
getDataText :: (HasNodeError err)
133 134
            => DataOrigin
            -> TermType Lang
135
            -> API.RawQuery
136
            -> Maybe PUBMED.APIKey
137
            -> Maybe EPO.AuthKey
138
            -> Maybe API.Limit
139
            -> DBCmdWithEnv env err (Either API.GetCorpusError DataText)
140
getDataText (ExternalOrigin api) la q mPubmedAPIKey mAuthKey li = do
141
  cfg <- view hasConfig
142
  eRes <- liftBase $ API.get api (_tt_lang la) q mPubmedAPIKey mAuthKey (_ac_epo_api_url $ _gc_apis cfg) li
143
  pure $ DataNew <$> eRes
144
getDataText (InternalOrigin _) la q _ _ _li = do
145
  (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
146
  ids <-  map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q)
147
  pure $ Right $ DataOld ids
148

149
getDataText_Debug :: (HasNodeError err)
150 151 152 153
                  => DataOrigin
                  -> TermType Lang
                  -> API.RawQuery
                  -> Maybe API.Limit
154
                  -> DBCmdWithEnv env err ()
155
getDataText_Debug a l q li = do
156
  result <- getDataText a l q Nothing Nothing li
157
  case result of
158
    Left  err -> liftBase $ putText $ show err
159 160 161
    Right res -> liftBase $ printDataText res


162
-------------------------------------------------------------------------------
163
flowDataText :: forall env err m.
164
                ( IsDBCmd env err m
165 166 167 168
                , HasNodeStory env err m
                , MonadLogger m
                , HasNLPServer env
                , HasTreeError err
169
                , HasValidationError err
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
170
                , MonadJobStatus m
171
                , HasCentralExchangeNotification env
172 173 174 175 176
                )
                => User
                -> DataText
                -> TermType Lang
                -> CorpusId
177
                -> Maybe FlowSocialListWith
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
178
                -> JobHandle m
179
                -> m CorpusId
180
flowDataText u (DataOld ids) tt cid mfslw _ = do
181
  $(logLocM) DEBUG $ T.pack $ "Found " <> show (length ids) <> " old node IDs"
182
  (_userId, userCorpusId, listId) <- createNodes (MkCorpusUserNormalCorpusIds u [cid]) corpusType
183
  _ <- Doc.add userCorpusId (map nodeId2ContextId ids)
184
  flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw
185
  where
186
    corpusType = Nothing :: Maybe HyperdataCorpus
187 188 189
flowDataText u (DataNew (mLen, txtC)) tt cid mfslw jobHandle = do
  $(logLocM) DEBUG $ T.pack $ "Found " <> show mLen <> " new documents to process"
  for_ (mLen <&> fromInteger) (`addMoreSteps` jobHandle)
190
  flowCorpus (MkCorpusUserNormalCorpusIds u [cid]) tt mfslw (fromMaybe 0 mLen, transPipe liftBase txtC) jobHandle
191 192

------------------------------------------------------------------------
193
-- TODO use proxy
194
flowAnnuaire :: ( IsDBCmd env err m
195 196 197 198
                , HasNodeStory env err m
                , MonadLogger m
                , HasNLPServer env
                , HasTreeError err
199
                , HasValidationError err
200 201
                , MonadJobStatus m
                , HasCentralExchangeNotification env )
202
             => MkCorpusUser
203
             -> TermType Lang
204
             -> FilePath
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
205
             -> JobHandle m
206
             -> m AnnuaireId
207
flowAnnuaire mkCorpusUser l filePath jobHandle = do
208
  -- TODO Conduit for file
209 210
  docs <- liftBase (readFile_Annuaire filePath :: IO [HyperdataContact])
  flow (Nothing :: Maybe HyperdataAnnuaire) mkCorpusUser l Nothing (fromIntegral $ length docs, yieldMany docs) jobHandle
211

212
------------------------------------------------------------------------
213
flowCorpusFile :: ( IsDBCmd env err m
214 215 216 217
                  , HasNodeStory env err m
                  , MonadLogger m
                  , HasNLPServer env
                  , HasTreeError err
218
                  , HasValidationError err
219 220
                  , MonadJobStatus m
                  , HasCentralExchangeNotification env )
221
           => MkCorpusUser
222
           -> Limit -- Limit the number of docs (for dev purpose)
223 224 225 226
           -> TermType Lang
           -> FileType
           -> FileFormat
           -> FilePath
227
           -> Maybe FlowSocialListWith
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
228
           -> JobHandle m
229
           -> m CorpusId
230
flowCorpusFile mkCorpusUser _l la ft ff fp mfslw jobHandle = do
231
  eParsed <- liftBase $ parseFile ft ff fp
232 233
  case eParsed of
    Right parsed -> do
234
      flowCorpus mkCorpusUser la mfslw (fromIntegral $ length parsed, yieldMany parsed .| mapC toHyperdataDocument) jobHandle
235 236
      --let docs = splitEvery 500 $ take l parsed
      --flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
237
    Left e       -> panicTrace $ "Error: " <> e
238 239

------------------------------------------------------------------------
240
-- | TODO improve the needed type to create/update a corpus
241
-- (For now, Either is enough)
242
flowCorpus :: ( IsDBCmd env err m
243 244 245 246
              , HasNodeStory env err m
              , MonadLogger m
              , HasNLPServer env
              , HasTreeError err
247
              , HasValidationError err
248
              , FlowCorpus a
249 250
              , MonadJobStatus m
              , HasCentralExchangeNotification env )
251
           => MkCorpusUser
252
           -> TermType Lang
253
           -> Maybe FlowSocialListWith
254
           -> (Integer, ConduitT () a m ())
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
255
           -> JobHandle m
256 257 258
           -> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)

259

260
flow :: forall env err m a c.
261
        ( IsDBCmd env err m
262 263 264 265
        , HasNodeStory env err m
        , MonadLogger m
        , HasNLPServer env
        , HasTreeError err
266
        , HasValidationError err
267 268
        , FlowCorpus a
        , MkCorpus c
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
269
        , MonadJobStatus m
270
        , HasCentralExchangeNotification env
271 272
        )
        => Maybe c
273
        -> MkCorpusUser
274
        -> TermType Lang
275
        -> Maybe FlowSocialListWith
276
        -> (Integer, ConduitT () a m ())
Alfredo Di Napoli's avatar
Alfredo Di Napoli committed
277
        -> JobHandle m
278
        -> m CorpusId
279 280
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
  (_userId, userCorpusId, listId) <- createNodes mkCorpusUser c
281
  -- TODO if public insertMasterDocs else insertUserDocs
282 283
  nlpServer <- view $ nlpServerGet (_tt_lang la)
  runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
284
    .| CList.chunksOf 5
285 286
    .| mapM_C (addDocumentsWithProgress nlpServer userCorpusId)
    .| sinkNull
287

288 289
  let u = userFromMkCorpusUser mkCorpusUser
    
290 291
  $(logLocM) DEBUG "Calling flowCorpusUser"
  flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
292

293
  where
294 295
    addDocumentsWithProgress :: NLPServerConfig -> CorpusId -> [(Int, a)] -> m ()
    addDocumentsWithProgress nlpServer userCorpusId docsChunk = do
296
      $(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, count)
297
      docs <- addDocumentsToHyperCorpus nlpServer c la userCorpusId (map snd docsChunk)
298
      markProgress (length docs) jobHandle
299

300

301 302
-- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds
-- the given documents to the corpus. Returns the Ids of the inserted documents.
303
addDocumentsToHyperCorpus :: ( IsDBCmd env err m
304 305 306 307 308 309 310 311 312 313
                             , HasNodeError err
                             , FlowCorpus document
                             , MkCorpus corpus
                             )
                             => NLPServerConfig
                             -> Maybe corpus
                             -> TermType Lang
                             -> CorpusId
                             -> [document]
                             -> m [DocId]
314 315
addDocumentsToHyperCorpus ncs mb_hyper la corpusId docs = do
  ids <- insertMasterDocs ncs mb_hyper la docs
316
  void $ Doc.add corpusId (map nodeId2ContextId ids)
317 318
  pure ids

319
------------------------------------------------------------------------
320
createNodes :: ( IsDBCmd env err m, HasNodeError err
321
               , MkCorpus c
322
               , HasCentralExchangeNotification env
323
               )
324
            => MkCorpusUser
325 326
            -> Maybe c
            -> m (UserId, CorpusId, ListId)
327
createNodes mkCorpusUser ctype = do
328
  -- User Flow
329
  (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus mkCorpusUser ctype
330
  -- NodeTexts is first
331
  _tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
332 333 334
  -- printDebug "NodeTexts: " tId

  -- NodeList is second
335
  listId <- getOrMkList userCorpusId userId
336

337 338
  -- User Graph Flow
  _ <- insertDefaultNodeIfNotExists NodeGraph     userCorpusId userId
339
  -- _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
340

341 342
  ce_notify $ UpdateTreeFirstLevel listId
  ce_notify $ UpdateTreeFirstLevel userCorpusId
343

344
  pure (userId, userCorpusId, listId)
345

346

347
flowCorpusUser :: ( HasNodeError err
348
                  , HasValidationError err
349 350 351
                  , HasNLPServer env
                  , HasTreeError err
                  , HasNodeStory env err m
352 353 354 355 356 357 358 359 360 361
                  , MkCorpus c
                  )
               => Lang
               -> User
               -> CorpusId
               -> ListId
               -> Maybe c
               -> Maybe FlowSocialListWith
               -> m CorpusId
flowCorpusUser l user userCorpusId listId ctype mfslw = do
362
  buildSocialList l user userCorpusId listId ctype mfslw
363

364
  -- _ <- insertOccsUpdates userCorpusId mastListId
365
  --_ <- mkPhylo  userCorpusId userId
366
  -- Annuaire Flow
367
  -- _ <- mkAnnuaire  rootUserId userId
368
  _ <- reIndexWith userCorpusId listId NgramsTerms (Set.singleton MapTerm)
369
  _ <- updateContextScore      userCorpusId listId
370
  _ <- updateNgramsOccurrences userCorpusId listId
371

372
  pure userCorpusId
373

374

375
-- | This function is responsible for contructing terms.
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
buildSocialList :: ( HasNodeError err
                   , HasValidationError err
                   , HasNLPServer env
                   , HasTreeError err
                   , HasNodeStory env err m
                   , MkCorpus c
                   )
                => Lang
                -> User
                -> CorpusId
                -> ListId
                -> Maybe c
                -> Maybe FlowSocialListWith
                -> m ()
buildSocialList _l _user _userCorpusId _listId _ctype (Just (NoList _)) = pure ()
buildSocialList l user userCorpusId listId ctype mfslw = do
  -- User List Flow
  (masterUserId, _masterRootId, masterCorpusId)
394
    <- getOrMkRootWithCorpus MkCorpusUserMaster ctype
395 396 397 398

  nlpServer <- view (nlpServerGet l)
  --let gp = (GroupParams l 2 3 (StopSize 3))
  -- Here the PosTagAlgo should be chosen according to the Lang
399 400 401 402 403 404
  -- let gp = GroupParams { unGroupParams_lang = l
  --                      , unGroupParams_len = 10
  --                      , unGroupParams_limit = 10
  --                      , unGroupParams_stopSize = StopSize 10 }
  let gp = GroupWithPosTag l nlpServer HashMap.empty
  ngs  <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
405 406 407 408 409 410 411 412

  -- printDebug "flowCorpusUser:ngs" ngs

  _userListId <- flowList_DbRepo listId ngs
  _mastListId <- getOrMkList masterCorpusId masterUserId
  pure ()


413
insertMasterDocs :: ( IsDBCmd env err m
414
                    , HasNodeError err
415 416 417
                    , FlowCorpus a
                    , MkCorpus   c
                    )
418 419
                 => NLPServerConfig
                 -> Maybe c
Alexandre Delanoë's avatar
Alexandre Delanoë committed
420
                 -> TermType Lang
421 422
                 -> [a]
                 -> m [DocId]
423
insertMasterDocs ncs c lang hs  =  do
424
  (masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus MkCorpusUserMaster c
425
  (ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs )
426
  _ <- Doc.add masterCorpusId ids'
427 428 429 430 431
  -- TODO
  -- create a corpus with database name (CSV or PubMed)
  -- add documents to the corpus (create node_node link)
  -- this will enable global database monitoring

432
  -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
433
  mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
434
                <- mapNodeIdNgrams
Alexandre Delanoë's avatar
Alexandre Delanoë committed
435
                <$> documentIdWithNgrams
436
                      (extractNgramsT ncs $ withLang lang documentsWithId)
437
                      (map (B.first contextId2NodeId) documentsWithId)
438

439
  lId <- getOrMkList masterCorpusId masterUserId
440
  -- _ <- saveDocNgramsWith lId mapNgramsDocs'
441 442 443
  _ <- saveDocNgramsWith lId mapNgramsDocs'

  -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
444
  pure $ map contextId2NodeId ids'
445

446
saveDocNgramsWith :: (IsDBCmd env err m)
447
                  => ListId
448
                  -> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
449 450
                  -> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
451
  --printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
452 453
  let mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
  terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocsNoCount
454

455 456
  let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'

457 458
  -- new
  mapCgramsId <- listInsertDb lId toNodeNgramsW'
459
               $ map (bimap _ngramsTerms Map.keys)
460
               $ HashMap.toList mapNgramsDocs
461

462
  --printDebug "saveDocNgramsWith" mapCgramsId
463
  -- insertDocNgrams
464 465
  let ngrams2insert =  catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId)
                                            <$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
466
                                            <*> Just (fromIntegral w :: Double)
467
                       | (terms'', mapNgramsTypes)      <- HashMap.toList mapNgramsDocs
468
                       , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
469
                       , (nId, (w, _cnt))               <- Map.toList mapNodeIdWeight
470
                       ]
471
  -- printDebug "Ngrams2Insert" ngrams2insert
472
  _return <- insertContextNodeNgrams2 ngrams2insert
473

474
  -- to be removed
475
  _   <- insertDocNgrams lId $ HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
476 477 478

  pure ()

479

480
------------------------------------------------------------------------
481 482 483 484



-- | Re-index documents of a corpus with ngrams in the list
485
reIndexWith :: ( HasNodeStory env err m )
486 487 488 489 490 491 492
            => CorpusId
            -> ListId
            -> NgramsType
            -> Set ListType
            -> m ()
reIndexWith cId lId nt lts = do
  -- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
493
  corpus_node <- getNodeWith cId (Proxy @HyperdataCorpus)
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
  let corpusLang = withDefaultLanguage $ view (node_hyperdata . to _hc_lang) corpus_node

  -- Getting [NgramsTerm]
  ts <- List.concat
     <$> map (\(k,vs) -> k:vs)
     <$> HashMap.toList
     <$> getTermsWith identity [lId] nt lts

  -- Get all documents of the corpus
  docs <- selectDocNodes cId

  let
    -- fromListWith (<>)
    ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b')))))
                $ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
                $ map (docNgrams corpusLang nt ts) docs

  -- Saving the indexation in database
512
  mapM_ (saveDocNgramsWith lId) ngramsByDoc
513
  pure ()