{-|
Module      : Gargantext.Database.Flow
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-- TODO-ACCESS:
--   check userId       CanFillUserCorpus   userCorpusId
--   check masterUserId CanFillMasterCorpus masterCorpusId

-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
-}

{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds         #-}
{-# LANGUAGE InstanceSigs            #-}
{-# LANGUAGE ScopedTypeVariables     #-}
{-# LANGUAGE TemplateHaskell         #-}
{-# LANGUAGE TypeApplications        #-}
{-# LANGUAGE TypeOperators           #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TupleSections #-}

module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
  ( DataText(..)
  , getDataText
  , getDataText_Debug
  , flowDataText
  , flow

  , flowCorpusFile
  , flowCorpus
  , flowCorpusUser
  , flowAnnuaire
  , buildSocialList
  , addDocumentsToHyperCorpus

  , reIndexWith
  , ngramsByDoc

  , getOrMkRoot
  , getOrMkRootWithCorpus
  , TermType(..)
  , DataOrigin(..)
  , allDataOrigins

  , do_api
  )
    where

import Conduit
import Control.Lens ( to, view )
import Data.Bifunctor qualified as B
import Data.Conduit qualified as C
import Data.Conduit.Internal (zipSources)
import Data.Conduit.List qualified as CL
import Data.Conduit.List qualified as CList
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core (Lang(..), withDefaultLanguage, NLPServerConfig)
import Gargantext.Core.Notifications.CentralExchange.Types (HasCentralExchangeNotification(ce_notify), CEMessage(..))
import Gargantext.Core.Config (GargConfig(..), hasConfig)
import Gargantext.Core.Config.Types (APIsConfig(..))
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory.Types (HasNodeStory, NodeStoryEnv, HasNodeStoryEnv (..))
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem (GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms), Ngrams(_ngramsTerms))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types (HasValidationError, TermsCount, TermsWeight(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Action.Flow.Extract ()  -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus )
import Gargantext.Database.Action.Flow.Utils (documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams, ngramsByDoc)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument), HyperdataDocument )
import Gargantext.Database.Admin.Types.Node hiding (DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 )
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode) ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
import Gargantext.Database.Schema.Ngrams ( indexNgrams, NgramsId )
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )

------------------------------------------------------------------------
-- Imports for upgrade function
import Gargantext.Database.Query.Tree.Error ( HasTreeError )

------------------------------------------------------------------------

allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs

---------------

-- Show instance is not possible because of IO
printDataText :: DataText -> IO ()
printDataText (DataOld xs) = putText $ show xs
printDataText (DataNew (maybeInt, conduitData)) = do
  res <- C.runConduit (conduitData .| CL.consume)
  putText $ show (maybeInt, res)


-- TODO use the split parameter in config file
getDataText :: (HasNodeError err)
            => DataOrigin
            -> TermType Lang
            -> API.RawQuery
            -> Maybe API.Limit
            -> DBCmdWithEnv env err (Either API.GetCorpusError DataText)
getDataText (ExternalOrigin api) la q li = do
  cfg <- view hasConfig
  eRes <- liftBase $ API.get api (_tt_lang la) q (_ac_epo_api_url $ _gc_apis cfg) li
  pure $ DataNew <$> eRes
getDataText (InternalOrigin _) la q _li = do
  cfg <- view hasConfig
  runDBTx $ do
    (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster (Nothing :: Maybe HyperdataCorpus)
    ids <-  map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q)
    pure $ Right $ DataOld ids

getDataText_Debug :: (HasNodeError err)
                  => DataOrigin
                  -> TermType Lang
                  -> API.RawQuery
                  -> Maybe API.Limit
                  -> DBCmdWithEnv env err ()
getDataText_Debug a l q li = do
  result <- getDataText a l q li
  case result of
    Left  err -> liftBase $ putText $ show err
    Right res -> liftBase $ printDataText res


-------------------------------------------------------------------------------
flowDataText :: forall env err m.
                ( IsDBCmd env err m
                , HasNodeStory env err m
                , MonadLogger m
                , HasNLPServer env
                , HasTreeError err
                , HasValidationError err
                , MonadJobStatus m
                , HasCentralExchangeNotification env
                )
                => User
                -> DataText
                -> TermType Lang
                -> CorpusId
                -> Maybe FlowSocialListWith
                -> JobHandle m
                -> m CorpusId
flowDataText u (DataOld ids) tt cid mfslw _ = do
  $(logLocM) DEBUG $ T.pack $ "Found " <> show (length ids) <> " old node IDs"
  cfg <- view hasConfig
  (_userId, userCorpusId, listId, msgs) <- runDBTx $ do
    (a, user_corpus_id, c, d) <- createNodes cfg (MkCorpusUserNormalCorpusIds u [cid]) corpusType
    _ <- Doc.add user_corpus_id (map nodeId2ContextId ids)
    pure (a, user_corpus_id, c, d)
  forM_ msgs ce_notify
  flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw
  where
    corpusType = Nothing :: Maybe HyperdataCorpus
flowDataText u (DataNew (mLen, txtC)) tt cid mfslw jobHandle = do
  $(logLocM) DEBUG $ T.pack $ "Found " <> show mLen <> " new documents to process"
  for_ (mLen <&> fromInteger) (`addMoreSteps` jobHandle)
  flowCorpus (MkCorpusUserNormalCorpusIds u [cid]) tt mfslw (fromMaybe 0 mLen, transPipe liftBase txtC) jobHandle

------------------------------------------------------------------------
-- TODO use proxy
flowAnnuaire :: ( IsDBCmd env err m
                , HasNodeStory env err m
                , MonadLogger m
                , HasNLPServer env
                , HasTreeError err
                , HasValidationError err
                , MonadJobStatus m
                , HasCentralExchangeNotification env )
             => MkCorpusUser
             -> TermType Lang
             -> FilePath
             -> JobHandle m
             -> m AnnuaireId
flowAnnuaire mkCorpusUser l filePath jobHandle = do
  -- TODO Conduit for file
  docs <- liftBase (readFile_Annuaire filePath :: IO [HyperdataContact])
  flow (Nothing :: Maybe HyperdataAnnuaire) mkCorpusUser l Nothing (fromIntegral $ length docs, yieldMany docs) jobHandle

------------------------------------------------------------------------
flowCorpusFile :: ( IsDBCmd env err m
                  , HasNodeStory env err m
                  , MonadLogger m
                  , HasNLPServer env
                  , HasTreeError err
                  , HasValidationError err
                  , MonadJobStatus m
                  , HasCentralExchangeNotification env )
           => MkCorpusUser
           -> TermType Lang
           -> FileType
           -> FileFormat
           -> FilePath
           -> Maybe FlowSocialListWith
           -> JobHandle m
           -> m CorpusId
flowCorpusFile mkCorpusUser la ft ff fp mfslw jobHandle = do
  eParsed <- liftBase $ parseFile ft ff fp
  case eParsed of
    Right parsed -> do
      flowCorpus mkCorpusUser la mfslw (fromIntegral $ length parsed, yieldMany parsed .| mapC toHyperdataDocument) jobHandle
      --let docs = splitEvery 500 $ take l parsed
      --flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
    Left e       -> panicTrace $ "Error: " <> e

------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-- (For now, Either is enough)
flowCorpus :: ( IsDBCmd env err m
              , HasNodeStory env err m
              , MonadLogger m
              , HasNLPServer env
              , HasTreeError err
              , HasValidationError err
              , FlowCorpus a
              , MonadJobStatus m
              , HasCentralExchangeNotification env )
           => MkCorpusUser
           -> TermType Lang
           -> Maybe FlowSocialListWith
           -> (Integer, ConduitT () a m ())
           -> JobHandle m
           -> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)


flow :: forall env err m a c.
        ( IsDBCmd env err m
        , HasNodeStory env err m
        , MonadLogger m
        , HasNLPServer env
        , HasTreeError err
        , HasValidationError err
        , FlowCorpus a
        , MkCorpus c
        , MonadJobStatus m
        , HasCentralExchangeNotification env
        )
        => Maybe c
        -> MkCorpusUser
        -> TermType Lang
        -> Maybe FlowSocialListWith
        -> (Integer, ConduitT () a m ())
        -> JobHandle m
        -> m CorpusId
flow c mkCorpusUser la mfslw (count, docsC) jobHandle = do
  cfg <- view hasConfig
  (_userId, userCorpusId, listId, msgs) <- runDBTx $ createNodes cfg mkCorpusUser c
  forM_ msgs ce_notify
  -- TODO if public insertMasterDocs else insertUserDocs
  runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
    .| CList.chunksOf 5
    .| mapM_C (addDocumentsWithProgress userCorpusId)
    .| sinkNull

  let u = userFromMkCorpusUser mkCorpusUser
    
  $(logLocM) DEBUG "Calling flowCorpusUser"
  flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw

  where
    addDocumentsWithProgress :: CorpusId -> [(Int, a)] -> m ()
    addDocumentsWithProgress userCorpusId docsChunk = do
      $(logLocM) DEBUG $ T.pack $ "calling insertDoc, ([idx], mLength) = " <> show (fst <$> docsChunk, count)
      docs <- addDocumentsToHyperCorpus c la userCorpusId (map snd docsChunk)
      markProgress (length docs) jobHandle


-- | Given a list of corpus documents and a 'NodeId' identifying the 'CorpusId', adds
-- the given documents to the corpus. Returns the Ids of the inserted documents.
addDocumentsToHyperCorpus :: ( IsDBCmd env err m
                             , HasNodeError err
                             , HasNLPServer env
                             , FlowCorpus document
                             , MkCorpus corpus
                             )
                             => Maybe corpus
                             -> TermType Lang
                             -> CorpusId
                             -> [document]
                             -> m [DocId]
addDocumentsToHyperCorpus mb_hyper la corpusId docs = do
  cfg <- view hasConfig
  nlp <- view (nlpServerGet $ _tt_lang la)
  ids <- insertMasterDocs cfg nlp mb_hyper la docs
  runDBTx $ do
    void $ Doc.add corpusId (map nodeId2ContextId ids)
    pure ids

------------------------------------------------------------------------
createNodes :: ( HasNodeError err
               , MkCorpus c
               )
            => GargConfig
            -> MkCorpusUser
            -> Maybe c
            -> DBUpdate err (UserId, CorpusId, ListId, [CEMessage])
createNodes cfg mkCorpusUser ctype = do
  -- User Flow
  (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus cfg mkCorpusUser ctype
  -- NodeTexts is first
  _tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
  -- printDebug "NodeTexts: " tId

  -- NodeList is second
  listId <- getOrMkList userCorpusId userId

  -- User Graph Flow
  _ <- insertDefaultNodeIfNotExists NodeGraph     userCorpusId userId
  -- _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId

  let msgs = [UpdateTreeFirstLevel listId, UpdateTreeFirstLevel userCorpusId]

  pure (userId, userCorpusId, listId, msgs)


flowCorpusUser :: ( HasNodeError err
                  , HasValidationError err
                  , HasNLPServer env
                  , HasTreeError err
                  , HasNodeStory env err m
                  , MkCorpus c
                  )
               => Lang
               -> User
               -> CorpusId
               -> ListId
               -> Maybe c
               -> Maybe FlowSocialListWith
               -> m CorpusId
flowCorpusUser l user userCorpusId listId ctype mfslw = do
  env <- view hasNodeStory
  buildSocialList l user userCorpusId listId ctype mfslw
  runDBTx $ do
    -- _ <- insertOccsUpdates userCorpusId mastListId
    --_ <- mkPhylo  userCorpusId userId
    -- Annuaire Flow
    -- _ <- mkAnnuaire  rootUserId userId
    _ <- reIndexWith env userCorpusId listId NgramsTerms (Set.singleton MapTerm)
    _ <- updateContextScore      env userCorpusId listId
    _ <- updateNgramsOccurrences env userCorpusId listId

    pure userCorpusId


-- | This function is responsible for contructing terms.
buildSocialList :: ( HasNodeError err
                   , HasValidationError err
                   , HasTreeError err
                   , HasNodeStory env err m
                   , MkCorpus c
                   , HasNLPServer env
                   )
                => Lang
                -> User
                -> CorpusId
                -> ListId
                -> Maybe c
                -> Maybe FlowSocialListWith
                -> m ()
buildSocialList l user userCorpusId listId ctype = \case
  Just (NoList _) -> pure ()
  mfslw -> do
    cfg <- view hasConfig
    nlpServer <- view (nlpServerGet l)
    (masterUserId, masterCorpusId, ngs) <- runDBTx $ do
      -- User List Flow
      (master_user_id, _masterRootId, master_corpus_id)
        <- getOrMkRootWithCorpus cfg MkCorpusUserMaster ctype

      let gp = GroupWithPosTag l nlpServer HashMap.empty
      (master_user_id, master_corpus_id,) <$> buildNgramsLists user userCorpusId master_corpus_id mfslw gp

    -- printDebug "flowCorpusUser:ngs" ngs

    _userListId <- flowList_DbRepo listId ngs
    _mastListId <- runDBTx $ getOrMkList masterCorpusId masterUserId
    pure ()


-- FIME(adn): the use of 'extractNgramsT' is iffy and problematic -- we shouldn't
-- be contacting the NLP server in the middle of some DB ops! we should extract
-- the tokens /before/ inserting things into the DB.
insertMasterDocs :: ( HasNodeError err
                    , FlowCorpus a
                    , MkCorpus   c
                    , IsDBCmd env err m
                    )
                 => GargConfig
                 -> NLPServerConfig
                 -> Maybe c
                 -> TermType Lang
                 -> [a]
                 -> m [DocId]
insertMasterDocs cfg nlpServer c lang hs  =  do

  (masterUserId, masterCorpusId, documentsWithId, ids') <- runDBTx $ do
    (master_user_id, _, master_corpus_id) <- getOrMkRootWithCorpus cfg MkCorpusUserMaster c
    (ids_prime, documents_with_id) <- insertDocs master_user_id master_corpus_id (map (toNode master_user_id Nothing) hs )
    _ <- Doc.add master_corpus_id ids_prime
    pure (master_user_id, master_corpus_id, documents_with_id, ids_prime)

  -- TODO
  -- create a corpus with database name (CSV or PubMed)
  -- add documents to the corpus (create node_node link)
  -- this will enable global database monitoring

  mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
                <- mapNodeIdNgrams
                <$> documentIdWithNgrams
                      (extractNgramsT nlpServer $ withLang lang documentsWithId)
                      (map (B.first contextId2NodeId) documentsWithId)

  runDBTx $ do
    lId <- getOrMkList masterCorpusId masterUserId
    _ <- saveDocNgramsWith lId mapNgramsDocs'
    pure $ map contextId2NodeId ids'


saveDocNgramsWith :: ListId
                  -> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
                  -> DBUpdate err ()
saveDocNgramsWith lId mapNgramsDocs' = do
  (terms2id :: HashMap.HashMap Text NgramsId) <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'

  let mapNgramsDocs :: HashMap.HashMap Ngrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
      mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'

  -- new
  mapCgramsId <- listInsertDb lId toNodeNgramsW'
               $ map (bimap _ngramsTerms Map.keys)
               $ HashMap.toList mapNgramsDocs

  let ngrams2insert =  catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId)
                                            <$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
                                            <*> Just (fromIntegral $ unTermsWeight w :: Double)
                       | (terms'', mapNgramsTypes)      <- HashMap.toList mapNgramsDocs
                       , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
                       , (nId, (w, _cnt))               <- Map.toList mapNodeIdWeight
                       ]
  _return <- insertContextNodeNgrams2 ngrams2insert

  -- to be removed
  _   <- insertDocNgrams lId $ HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs

  pure ()


------------------------------------------------------------------------



-- | Re-index documents of a corpus with ngrams in the list
reIndexWith :: HasNodeError err
            => NodeStoryEnv err
            -> CorpusId
            -> ListId
            -> NgramsType
            -> Set ListType
            -> DBUpdate err ()
reIndexWith env cId lId nt lts = do
  -- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
  corpus_node <- getNodeWith cId (Proxy @HyperdataCorpus)
  -- NOTE: This assumes a single language for the whole corpus
  let corpusLang = withDefaultLanguage $ view (node_hyperdata . to _hc_lang) corpus_node

  -- Getting [NgramsTerm]
  (ts :: [NgramsTerm]) <- List.concat
     <$> map (\(k, vs) -> k:vs)   -- this is concatenating parent with their children, 1st level only
     <$> HashMap.toList
     <$> getTermsWith env identity [lId] nt lts

  -- Get all documents of the corpus
  (docs :: [ContextOnlyId HyperdataDocument]) <- selectDocNodesOnlyId cId

  -- Saving the indexation in database
  mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs