{-|
Module      : Gargantext.Database.Flow
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-- TODO-ACCESS:
--   check userId       CanFillUserCorpus   userCorpusId
--   check masterUserId CanFillMasterCorpus masterCorpusId

-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
-}

{-# LANGUAGE BangPatterns            #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds         #-}
{-# LANGUAGE DerivingStrategies      #-}
{-# LANGUAGE InstanceSigs            #-}
{-# LANGUAGE LambdaCase              #-}
{-# LANGUAGE ScopedTypeVariables     #-}
{-# LANGUAGE TemplateHaskell         #-}
{-# LANGUAGE TupleSections           #-}
{-# LANGUAGE TypeApplications        #-}
{-# LANGUAGE TypeOperators           #-}

module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
  ( DataText(..)
  , getDataText
  , getDataText_Debug
  , flowDataText
  , flow

  , flowCorpusFile
  , flowCorpus
  , flowCorpusUser
  , flowAnnuaire
  , buildSocialList
  , addDocumentsToHyperCorpus

  , reIndexWith
  , ngramsByDoc

  , getOrMkRoot
  , getOrMkRootWithCorpus
  , TermType(..)
  , DataOrigin(..)
  , allDataOrigins

  , do_api
  )
    where

import Conduit
import Control.Lens ( to, view )
import Control.Exception.Safe (catch, MonadCatch)
import Data.Conduit qualified as C
import Data.Conduit.Internal (zipSources)
import Data.Conduit.List qualified as CL
import Data.Conduit.List qualified as CList
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core (Lang(..), withDefaultLanguage, NLPServerConfig)
import Gargantext.Core.Config (GargConfig(..), hasConfig)
import Gargantext.Core.Config.Types (APIsConfig(..))
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory.Types (HasNodeStory, NodeStoryEnv, HasNodeStoryEnv (..))
import Gargantext.Core.Notifications.CentralExchange.Types (HasCentralExchangeNotification(ce_notify), CEMessage(..))
import Gargantext.Core.Text (HasText)
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms), Ngrams(_ngramsTerms))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Action.Flow.Extract ()  -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus, DocumentIdWithNgrams (..) )
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, insertDocs, mkNodeIdNgramsMap, ngramsByDoc, documentIdWithNgrams)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument), HyperdataDocument )
import Gargantext.Database.Admin.Types.Node hiding (ERROR, DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 )
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode), UniqParameters (..), newUniqIdHash ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId )
import Gargantext.Database.Schema.Node
import Gargantext.Database.Types
import Gargantext.Prelude hiding (catch, onException, to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG, ERROR), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )

------------------------------------------------------------------------
-- Imports for upgrade function
import Gargantext.Database.Query.Tree.Error ( HasTreeError )
import Gargantext.Core.Text.Terms.Multi (MultitermsExtractionException)

------------------------------------------------------------------------

allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs

---------------

-- Show instance is not possible because of IO
printDataText :: DataText -> IO ()
printDataText (DataOld xs) = putText $ show xs
printDataText (DataNew (maybeInt, conduitData)) = do
  res <- C.runConduit (conduitData .| CL.consume)
  putText $ show (maybeInt, res)


-- TODO use the split parameter in config file
getDataText :: (HasNodeError err)
            => DataOrigin
            -> TermType Lang
            -> API.RawQuery
            -> Maybe API.Limit
            -> DBCmdWithEnv env err (Either API.GetCorpusError DataText)
getDataText (ExternalOrigin api) la q li = do
  cfg <- view hasConfig
  eRes <- liftBase $ API.get api (_tt_lang la) q (_ac_epo_api_url $ _gc_apis cfg) li
  pure $ DataNew <$> eRes
getDataText (InternalOrigin _) la q _li = do
  cfg <- view hasConfig
  runDBTx $ do
    (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
    ids <-  map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q)
    pure $ Right $ DataOld ids

getDataText_Debug :: (HasNodeError err)
                  => DataOrigin
                  -> TermType Lang
                  -> API.RawQuery
                  -> Maybe API.Limit
                  -> DBCmdWithEnv env err ()
getDataText_Debug a l q li = do
  result <- getDataText a l q li
  case result of
    Left  err -> liftBase $ putText $ show err
    Right res -> liftBase $ printDataText res


-------------------------------------------------------------------------------
flowDataText :: forall env err m.
                ( IsDBCmd env err m
                , HasNodeStory env err m
                , MonadLogger m
                , HasNLPServer env
                , HasTreeError err
                , HasValidationError err
                , MonadJobStatus m
                , MonadCatch m
                , HasCentralExchangeNotification env
                )
                => User
                -> DataText
                -> TermType Lang
                -> CorpusId
                -> Maybe FlowSocialListWith
                -> JobHandle m
                -> m CorpusId
flowDataText u (DataOld ids) tt cid mfslw _ = do
  $(logLocM) DEBUG $ T.pack $ "Found " <> show (length ids) <> " old node IDs"
  cfg <- view hasConfig
  (_userId, userCorpusId, listId, msgs) <- runDBTx $ do
    (a, user_corpus_id, c, d) <- createNodes cfg (MkCorpusUserNormalCorpusIds u [cid]) corpusType
    _ <- Doc.add user_corpus_id (map nodeId2ContextId ids)
    pure (a, user_corpus_id, c, d)
  forM_ msgs ce_notify
  flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw
  where
    corpusType = Nothing :: Maybe HyperdataCorpus
flowDataText u (DataNew (mLen, txtC)) tt cid mfslw jobHandle = do
  $(logLocM) DEBUG $ T.pack $ "Found " <> show mLen <> " new documents to process"
  for_ (mLen <&> fromInteger) (`addMoreSteps` jobHandle)
  flowCorpus (MkCorpusUserNormalCorpusIds u [cid]) tt mfslw (fromMaybe 0 mLen, transPipe liftBase txtC) jobHandle

------------------------------------------------------------------------
-- TODO use proxy
flowAnnuaire :: ( IsDBCmd env err m
                , HasNodeStory env err m
                , MonadLogger m
                , HasNLPServer env
                , HasTreeError err
                , HasValidationError err
                , MonadJobStatus m
                , MonadCatch m
                , HasCentralExchangeNotification env )
             => MkCorpusUser
             -> TermType Lang
             -> FilePath
             -> JobHandle m
             -> m AnnuaireId
flowAnnuaire mkCorpusUser l filePath jobHandle = do
  -- TODO Conduit for file
  docs <- liftBase (readFile_Annuaire filePath :: IO [HyperdataContact])
  flow (Nothing :: Maybe HyperdataAnnuaire) mkCorpusUser l Nothing (fromIntegral $ length docs, yieldMany docs) jobHandle

------------------------------------------------------------------------
flowCorpusFile :: ( IsDBCmd env err m
                  , HasNodeStory env err m
                  , MonadLogger m
                  , HasNLPServer env
                  , HasTreeError err
                  , HasValidationError err
                  , MonadJobStatus m
                  , MonadCatch m
                  , HasCentralExchangeNotification env )
           => MkCorpusUser
           -> TermType Lang
           -> FileType
           -> FileFormat
           -> FilePath
           -> Maybe FlowSocialListWith
           -> JobHandle m
           -> m CorpusId
flowCorpusFile mkCorpusUser la ft ff fp mfslw jobHandle = do
  eParsed <- liftBase $ parseFile ft ff fp
  case eParsed of
    Right parsed -> do
      flowCorpus mkCorpusUser la mfslw (fromIntegral $ length parsed, yieldMany parsed .| mapC toHyperdataDocument) jobHandle
      --let docs = splitEvery 500 $ take l parsed
      --flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
    Left e       -> panicTrace $ "Error: " <> e

------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-- (For now, Either is enough)
flowCorpus :: ( IsDBCmd env err m
              , HasNodeStory env err m
              , MonadLogger m
              , HasNLPServer env
              , HasTreeError err
              , HasValidationError err
              , FlowCorpus a
              , ExtractNgrams m a
              , MonadJobStatus m
              , MonadCatch m
              , HasCentralExchangeNotification env
              )
           => MkCorpusUser
           -> TermType Lang
           -> Maybe FlowSocialListWith
           -> (Integer, ConduitT () a m ())
           -> JobHandle m
           -> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)


flow :: forall env err m a c.
        ( IsDBCmd env err m
        , HasNodeStory env err m
        , MonadLogger m
        , HasNLPServer env
        , HasTreeError err
        , HasValidationError err
        , FlowCorpus a
        , ExtractNgrams m a
        , MkCorpus c
        , MonadJobStatus m
        , HasCentralExchangeNotification env
        , MonadCatch m
        )
        => Maybe c
        -> MkCorpusUser
        -> TermType Lang
        -> Maybe FlowSocialListWith
        -> (Integer, ConduitT () a m ())
        -> JobHandle m
        -> m CorpusId
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
  cfg <- view hasConfig
  (_userId, userCorpusId, listId, msgs) <- runDBTx $ createNodes cfg mkCorpusUser c
  forM_ msgs ce_notify
  -- TODO if public insertMasterDocs else insertUserDocs
  runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
    .| CList.chunksOf 5
    .| mapM_C (addDocumentsWithProgress userCorpusId)
    .| sinkNull

  let u = userFromMkCorpusUser mkCorpusUser
    
  $(logLocM) DEBUG "Calling flowCorpusUser"
  flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw

  where
    addDocumentsWithProgress :: CorpusId -> [(Int, a)] -> m ()
    addDocumentsWithProgress userCorpusId docsChunk = do
      $(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, count)
      docs <- addDocumentsToHyperCorpus c la userCorpusId (map snd docsChunk)
      markProgress (length docs) jobHandle


-- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds
-- the given documents to the corpus. Returns the Ids of the inserted documents.
addDocumentsToHyperCorpus :: ( IsDBCmd env err m
                             , HasNodeError err
                             , HasNLPServer env
                             , FlowCorpus document
                             , ExtractNgrams m document
                             , MkCorpus corpus
                             , MonadLogger m
                             , MonadCatch m
                             )
                             => Maybe corpus
                             -> TermType Lang
                             -> CorpusId
                             -> [document]
                             -> m [DocId]
addDocumentsToHyperCorpus mb_hyper la corpusId docs = do
  cfg <- view hasConfig
  nlp <- view (nlpServerGet $ _tt_lang la)
  -- First extract all the ngrams for the input documents via the nlp server,
  -- collect errors (if any) and pass to 'insertMasterDocs' only the documents
  -- for which the ngrams extraction succeeded. At the moment errors are just
  -- logged, but in the future they could be returned upstream so that we can
  -- display a final result of how many were skipped, how many succeded etc.
  uncommittedNgrams <- extractNgramsFromDocuments nlp la docs
  ids <- runDBTx $ insertMasterDocs cfg uncommittedNgrams mb_hyper docs
  runDBTx $ do
    void $ Doc.add corpusId (map nodeId2ContextId ids)
    pure ids

------------------------------------------------------------------------
createNodes :: ( HasNodeError err
               , MkCorpus c
               )
            => GargConfig
            -> MkCorpusUser
            -> Maybe c
            -> DBUpdate err (UserId, CorpusId, ListId, [CEMessage])
createNodes cfg mkCorpusUser ctype = do
  -- User Flow
  (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus cfg mkCorpusUser ctype
  -- NodeTexts is first
  _tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
  -- printDebug "NodeTexts: " tId

  -- NodeList is second
  listId <- getOrMkList userCorpusId userId

  -- User Graph Flow
  _ <- insertDefaultNodeIfNotExists NodeGraph     userCorpusId userId
  -- _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId

  let msgs = [UpdateTreeFirstLevel listId, UpdateTreeFirstLevel userCorpusId]

  pure (userId, userCorpusId, listId, msgs)


flowCorpusUser :: ( HasNodeError err
                  , HasValidationError err
                  , HasNLPServer env
                  , HasTreeError err
                  , HasNodeStory env err m
                  , MkCorpus c
                  )
               => Lang
               -> User
               -> CorpusId
               -> ListId
               -> Maybe c
               -> Maybe FlowSocialListWith
               -> m CorpusId
flowCorpusUser l user userCorpusId listId ctype mfslw = do
  env <- view hasNodeStory
  buildSocialList l user userCorpusId listId ctype mfslw
  runDBTx $ do
    -- _ <- insertOccsUpdates userCorpusId mastListId
    --_ <- mkPhylo  userCorpusId userId
    -- Annuaire Flow
    -- _ <- mkAnnuaire  rootUserId userId
    _ <- reIndexWith env userCorpusId listId NgramsTerms (Set.singleton MapTerm)
    _ <- updateContextScore      env userCorpusId listId
    _ <- updateNgramsOccurrences env userCorpusId listId

    pure userCorpusId


-- | This function is responsible for contructing terms.
buildSocialList :: ( HasNodeError err
                   , HasValidationError err
                   , HasTreeError err
                   , HasNodeStory env err m
                   , MkCorpus c
                   , HasNLPServer env
                   )
                => Lang
                -> User
                -> CorpusId
                -> ListId
                -> Maybe c
                -> Maybe FlowSocialListWith
                -> m ()
buildSocialList l user userCorpusId listId ctype = \case
  Just (NoList _) -> pure ()
  mfslw -> do
    cfg <- view hasConfig
    nlpServer <- view (nlpServerGet l)
    (masterUserId, masterCorpusId, ngs) <- runDBTx $ do
      -- User List Flow
      (master_user_id, _masterRootId, master_corpus_id)
        <- getOrMkRootWithCorpus cfg MkCorpusUserMaster ctype

      let gp = GroupWithPosTag l nlpServer HashMap.empty
      (master_user_id, master_corpus_id,) <$> buildNgramsLists user userCorpusId master_corpus_id mfslw gp

    -- printDebug "flowCorpusUser:ngs" ngs

    _userListId <- flowList_DbRepo listId ngs
    _mastListId <- runDBTx $ getOrMkList masterCorpusId masterUserId
    pure ()

-------------------------------------------------------------------------------
--
-- Splitting Ngrams extraction from document creation
--
-------------------------------------------------------------------------------
--
-- There is a bit of tension between extracting the Ngrams and creating the documents:
-- We need to produce a map between a given 'NodeId' and the ngrams associated with it, where
-- the latter are extract via the NLP server. However, each ngrams has to be matched to the
-- NodeId associated with the new resource being created as part of 'insertMasterDocs'. This
-- creates a bit of a chicken-and-egg problem in trying to make 'insertMasterDocs' a 'DBUpdate'
-- function: we need a 'NodeId' to exist by the time we call 'extractNgrams' but the latter can't
-- be execute in a pure fashion without a 'NodeId'.
--
-- To fix this, we need a data structure which would index the ngrams by some other notion of
-- index, and later have a transformation function which would re-index these ngrams to the actual
-- 'NodeId' created during the DB Transaction.

-- | Ngrams that have been fully \"committed\", i.e. associated to the respective document
-- where the latter has been persisted (i.e. committed) on secondary storage.
type CommittedNgrams =
  HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))

newtype DocumentHashId
  = DocumentHashId { _DocumentHashId :: T.Text }
  deriving stock (Show, Eq)
  deriving newtype Ord

-- | Ngrams that have been extracted from the input 'doc' but not fully associated with
-- a persisted entity on the database.
newtype UncommittedNgrams doc = UncommittedNgrams
  { _UncommittedNgrams :: Map.Map DocumentHashId (DocumentIdWithNgrams DocumentHashId doc ExtractedNgrams) }
  deriving stock Show
  deriving newtype (Semigroup, Monoid)

data InsertDocError
  = NgramsNotFound !(Maybe DocumentHashId) !DocId
  deriving Show

extractNgramsFromDocument :: ( UniqParameters doc
                             , HasText doc
                             , ExtractNgrams m doc
                             , IsDBCmd err env m
                             , MonadLogger m
                             , MonadCatch m
                             )
                          => NLPServerConfig
                          -> TermType Lang
                          -> doc
                          -> m (UncommittedNgrams doc)
extractNgramsFromDocument nlpServer lang doc =
  -- In case of an exception from the NLP server, treat this as having no ngrams,
  -- but still index it in the final map, so that later reconciliation still works.
  -- Pratically speaking it means this won't have any ngrams associated, but the document
  -- will still be added to the corpus and we can try to regen the ngrams at a later stage.
  UncommittedNgrams . Map.singleton docId <$>
    (documentIdWithNgrams (extractNgrams nlpServer $ withLang lang [doc]) (Indexed docId doc)
     `catch` \(e :: MultitermsExtractionException) -> do
       $(logLocM) ERROR $ T.pack $ "Document with hash " <> show docId <> " failed ngrams extraction due to an exception: " <> displayException e
       pure $ DocumentIdWithNgrams (Indexed docId doc) mempty
    )
  where
    docId = DocumentHashId $ newUniqIdHash doc

commitNgramsForDocument :: UniqParameters doc
                        => UncommittedNgrams doc
                        -> Indexed ContextId (Node doc)
                        -> Either InsertDocError CommittedNgrams
commitNgramsForDocument (UncommittedNgrams ng) (Indexed oldIx node) = do
  docId <- mb_docId
  case Map.lookup docId ng of
    Nothing  -> Left  $ NgramsNotFound (Just docId) (_node_id node)
    Just ngs -> Right $ mkNodeIdNgramsMap [reIndex ngs]
  where
    mb_docId = case DocumentHashId <$> _node_hash_id node of
      Nothing  -> Left $ NgramsNotFound Nothing (_node_id node)
      Just dId -> Right dId

    reIndex :: DocumentIdWithNgrams DocumentHashId doc ExtractedNgrams
            -> DocumentIdWithNgrams NodeId doc ExtractedNgrams
    reIndex did =
      let (Indexed _ a) = documentWithId did
      in did { documentWithId = Indexed (contextId2NodeId oldIx) a }

extractNgramsFromDocuments :: forall doc env err m.
                           ( HasText doc
                           , UniqParameters doc
                           , ExtractNgrams m doc
                           , IsDBCmd env err m
                           , MonadLogger m
                           , MonadCatch m
                           )
                           => NLPServerConfig
                           -> TermType Lang
                           -> [doc]
                           -> m (UncommittedNgrams doc)
extractNgramsFromDocuments nlpServer lang docs =
  foldlM go mempty docs
  where
    go :: UncommittedNgrams doc -> doc -> m (UncommittedNgrams doc)
    go !acc inputDoc = do
      ngrams <- extractNgramsFromDocument nlpServer lang inputDoc
      pure $ acc <> ngrams

commitNgramsForDocuments :: UniqParameters doc
                         => UncommittedNgrams doc
                         -> [Indexed ContextId (Node doc)]
                         -> ([InsertDocError], CommittedNgrams)
commitNgramsForDocuments ng nodes =
  let (errs, successes) = partitionEithers $ map (commitNgramsForDocument ng) nodes
  in (errs, mconcat successes)

insertMasterDocs :: ( HasNodeError err
                    , UniqParameters doc
                    , FlowCorpus doc
                    , MkCorpus c
                    )
                 => GargConfig
                 -> UncommittedNgrams doc
                 -- ^ The ngrams extracted for /all/ the documents
                 -- and indexed by the hash of the given document.
                 -- We can use this map to associate the document
                 -- with the node being created.
                 -> Maybe c
                 -> [doc]
                 -> DBUpdate err [DocId]
insertMasterDocs cfg uncommittedNgrams c hs  = do
  (masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster c
  (ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId Nothing) hs )
  _ <- Doc.add masterCorpusId ids'

  -- TODO
  -- create a corpus with database name (CSV or PubMed)
  -- add documents to the corpus (create node_node link)
  -- this will enable global database monitoring

  let (_failedExtraction, ngramsDocsMap) = commitNgramsForDocuments uncommittedNgrams documentsWithId

  lId <- getOrMkList masterCorpusId masterUserId
  _ <- saveDocNgramsWith lId ngramsDocsMap
  pure $ map contextId2NodeId ids'


saveDocNgramsWith :: ListId
                  -> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
                  -> DBUpdate err ()
saveDocNgramsWith lId mapNgramsDocs' = do
  (terms2id :: HashMap.HashMap Text NgramsId) <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'

  let mapNgramsDocs :: HashMap.HashMap Ngrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
      mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'

  -- new
  mapCgramsId <- listInsertDb lId toNodeNgramsW'
               $ map (bimap _ngramsTerms Map.keys)
               $ HashMap.toList mapNgramsDocs

  let ngrams2insert =  catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId)
                                            <$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
                                            <*> Just (fromIntegral $ unTermsWeight w :: Double)
                       | (terms'', mapNgramsTypes)      <- HashMap.toList mapNgramsDocs
                       , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
                       , (nId, (w, _cnt))               <- Map.toList mapNodeIdWeight
                       ]
  _return <- insertContextNodeNgrams2 ngrams2insert

  -- to be removed
  _   <- insertDocNgrams lId $ HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs

  pure ()


------------------------------------------------------------------------



-- | Re-index documents of a corpus with ngrams in the list
reIndexWith :: HasNodeError err
            => NodeStoryEnv err
            -> CorpusId
            -> ListId
            -> NgramsType
            -> Set ListType
            -> DBUpdate err ()
reIndexWith env cId lId nt lts = do
  -- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
  corpus_node <- getNodeWith cId (Proxy @HyperdataCorpus)
  -- NOTE: This assumes a single language for the whole corpus
  let corpusLang = withDefaultLanguage $ view (node_hyperdata . to _hc_lang) corpus_node

  -- Getting [NgramsTerm]
  (ts :: [NgramsTerm]) <- List.concat
     <$> map (\(k, vs) -> k:vs)   -- this is concatenating parent with their children, 1st level only
     <$> HashMap.toList
     <$> getTermsWith env identity [lId] nt lts

  -- Get all documents of the corpus
  (docs :: [ContextOnlyId HyperdataDocument]) <- selectDocNodesOnlyId cId

  -- Saving the indexation in database
  mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs