{-|
Module      : Gargantext.API.Node.Corpus.Export.Utils
Description : Corpus export
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

{-# LANGUAGE TypeApplications #-}

module Gargantext.API.Node.Corpus.Export.Utils
  where

import Control.Exception.Safe qualified as CES
import Control.Monad.Fail (fail)
import Data.Aeson qualified as Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Data.Time.Clock (getCurrentTime, UTCTime)
import Data.Time.Format.ISO8601 (iso8601ParseM, iso8601Show)
import Data.Version (parseVersion, showVersion, Version)
import Database.SQLite.Simple qualified as S
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(..) )
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..), CorpusSQLiteData(..))
import Gargantext.Core.NodeStory.Types ( HasNodeStoryEnv, NodeListStory )
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types (CorpusId, ListId, NodeType(NodeList))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.List (HyperdataList)
import Gargantext.Database.Admin.Types.Node (unNodeId, ContextId(..), NodeId(UnsafeMkNodeId))
import Gargantext.Database.Prelude (DBCmd, IsDBCmd)
import Gargantext.Database.Schema.Context (context_id, context_name, context_date, context_hyperdata) 
import Gargantext.Database.Schema.Node (node_hash_id, node_hyperdata, node_name, node_parent_id)
import Gargantext.Database.Query.Table.Node ( defaultList, getNodeWith )
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Prelude
import Paths_gargantext qualified as PG -- cabal magic build module
import Prelude qualified
import System.Directory (removeDirectoryRecursive)
import System.IO.Temp (createTempDirectory, getCanonicalTemporaryDirectory)
import Text.ParserCombinators.ReadP (readP_to_S)



getContextNgrams :: HasNodeError err
                 => CorpusId
                 -> ListId
                 -> ListType
                 -> NgramsType
                 -> NodeListStory
                 -> DBCmd err (Map ContextId (Set NgramsTerm))
getContextNgrams cId lId listType nt repo = do
--  lId <- case lId' of
--    Nothing -> defaultList cId
--    Just  l -> pure l

  lIds <- selectNodesWithUsername NodeList userMaster
  let ngs = filterListWithRoot [listType] $ mapTermListRoot [lId] nt repo
  -- TODO HashMap
  getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)


mkCorpusSQLiteData :: ( CES.MonadMask m
                      , HasNodeStoryEnv env
                      , HasNodeError err
                      , IsDBCmd env err m )
                   => CorpusId
                   -> Maybe ListId
                   -> m CorpusSQLiteData
mkCorpusSQLiteData cId lId = do
  corpus <- getNodeWith cId (Proxy @HyperdataCorpus)
  now <- liftBase getCurrentTime
  
  listId <- case lId of
        Nothing -> defaultList cId
        Just l  -> pure l

  l <- getNodeWith listId (Proxy @HyperdataList)

  repo <- getRepo [listId]
  let nt = NgramsTerms
  mapNgs  <- getContextNgrams cId listId MapTerm nt repo
  stopNgs  <- getContextNgrams cId listId StopTerm nt repo
  candidateNgs  <- getContextNgrams cId listId CandidateTerm nt repo
  docs <- selectDocNodes cId

  pure $ CorpusSQLiteData { _csd_version = PG.version
                          , _csd_cId     = cId
                          , _csd_lId     = listId
                          , _csd_created = now
                          
                          , _csd_corpus_name = corpus ^. node_name
                          , _csd_corpus_hash = corpus ^. node_hash_id
                          , _csd_corpus_parent = corpus ^. node_parent_id
                          , _csd_corpus_hyperdata = corpus ^. node_hyperdata

                          , _csd_list_name = l ^. node_name
                          , _csd_list_parent = l ^. node_parent_id
                          , _csd_list_hyperdata = l ^. node_hyperdata

                          , _csd_contexts = (\c -> ( c ^. context_id
                                                   , c ^. context_name
                                                   , c ^. context_date
                                                   , c ^. context_hyperdata)) <$> docs

                          , _csd_map_context_ngrams = mapNgs
                          , _csd_stop_context_ngrams = stopNgs
                          , _csd_candidate_context_ngrams = candidateNgs
                          }


mkCorpusSQLite :: ( CES.MonadMask m
                  , MonadBase IO m )
               => CorpusSQLiteData
               -> m CorpusSQLite
mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpath) -> liftBase $ do
  putText $ "[mkCorpusSQLite] listId: " <> show _csd_lId
  putText $ "[mkCorpusSQLite] fp: " <> show fp

  S.withConnection fpath $ \conn -> do
    -- better performance
    -- https://kerkour.com/sqlite-for-servers
    S.execute_ conn "PRAGMA journal_mode = WAL"
    
    S.execute_ conn "CREATE TABLE info (key, value);"
    S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version)
    S.execute conn "INSERT INTO info (key, value) VALUES ('corpusId', ?)" (S.Only $ unNodeId _csd_cId)
    S.execute conn "INSERT INTO info (key, value) VALUES ('listId', ?)" (S.Only $ unNodeId _csd_lId)
    S.execute conn "INSERT INTO info (key, value) VALUES ('created', datetime(?))" (S.Only $ iso8601Show _csd_created)

    S.execute_ conn "CREATE TABLE corpus (id, name, hash, parent_id, hyperdata)"
    S.execute conn "INSERT INTO corpus (id, name, hash, parent_id, hyperdata) VALUES (?, ?, ?, ?, ?)"
      ( unNodeId _csd_cId
      , _csd_corpus_name
      , _csd_corpus_hash
      , unNodeId <$> _csd_corpus_parent
      , Aeson.encode _csd_corpus_hyperdata )

    S.execute_ conn "CREATE TABLE lists (id, name, parent_id, hyperdata)"
    S.execute conn "INSERT INTO lists (id, name, parent_id, hyperdata) VALUES (?, ?, ?, ?)"
      ( unNodeId _csd_lId
      , _csd_list_name
      , unNodeId <$> _csd_list_parent
      , Aeson.encode _csd_list_hyperdata )

    S.execute_ conn "CREATE TABLE ngrams (context_id, terms, type_)"
    let insertTerms ngs type_ = do
                      let ngs' = concatMap (\(ctxId, ngrams) -> (\n -> (_ContextId ctxId, unNgramsTerm n, type_)) <$> Set.toList ngrams) (Map.toList ngs)
                      S.executeMany conn "INSERT INTO ngrams (context_id, terms, type_) VALUES (?, ?, ?)" ngs'
    insertTerms _csd_map_context_ngrams ("map" :: Text)
    insertTerms _csd_stop_context_ngrams ("stop" :: Text)
    insertTerms _csd_candidate_context_ngrams ("candidate" :: Text)

    S.execute_ conn "CREATE TABLE documents (context_id, name, date, hyperdata)"
    S.executeMany conn "INSERT INTO documents (context_id, name, date, hyperdata) VALUES (?, ?, date(?), ?)"
      ((\(ctxId, ctxName, ctxDate, ctxHyperdata) -> ( unNodeId ctxId
                                                    , ctxName
                                                    , iso8601Show ctxDate
                                                    , Aeson.encode ctxHyperdata )) <$> _csd_contexts)

  bsl <- BSL.readFile fpath
  
  pure $ CorpusSQLite { _cs_bs = bsl }


readCorpusSQLite :: ( CES.MonadMask m
                    , MonadBase IO m )
                 => CorpusSQLite
                 -> m (Either Text CorpusSQLiteData)
readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname, fpath) -> liftBase $ do
  (info, corpusData, listData, documents, ngrams) <- S.withConnection fpath $ \conn -> do
    [S.Only version] <- S.query_ conn "SELECT value FROM info WHERE key = 'gargVersion'"
    [S.Only cId]     <- S.query_ conn "SELECT value FROM info WHERE key = 'corpusId'"
    [S.Only lId]     <- S.query_ conn "SELECT value FROM info WHERE key = 'listId'"
    [S.Only created] <- S.query_ conn "SELECT value FROM info WHERE key = 'created'"
    let info = (version, cId, lId, created)

    [corpusData] <- S.query_ conn "SELECT name, hash, parent_id, hyperdata FROM corpus"
    [listData]   <- S.query_ conn "SELECT name, parent_id, hyperdata FROM lists"
    documents    <- S.query_ conn "SELECT context_id, name, date, hyperdata FROM documents"
    ngrams       <- S.query_ conn "SELECT context_id, terms, type_ FROM ngrams"

    pure (info, corpusData, listData, documents, ngrams)

  let (version, cId, lId, created) = info
  let (_csd_corpus_name, _csd_corpus_hash, corpusParent, corpusHyperdata) = corpusData
  let (_csd_list_name, listParent, listHyperdata) = listData

  let (context_errors, _csd_contexts) = partitionEithers (parseCtx <$> documents)

  let (ngrams_errors, ngrams_) = partitionEithers (parseNgrams <$> ngrams)

  -- NOTE To make things simpler, use the 'Either Text CorpusSQLData' as a monad
  --      (there's quire a few things to test here)
  pure $ do
    _csd_version <- eParseVersion version
    _csd_created <- maybeToEither ("Incorrect created: " <> T.pack created) $ iso8601ParseM created
    _csd_corpus_hyperdata <- first T.pack $ Aeson.eitherDecode corpusHyperdata
    _csd_list_hyperdata <- first T.pack $ Aeson.eitherDecode listHyperdata
    
    unless (null context_errors) $ fail $ "Context errors: " <> show context_errors
    unless (null ngrams_errors) $ fail $ "Ngrams errors: " <> show ngrams_errors 

    let _csd_cId = UnsafeMkNodeId cId
    let _csd_lId = UnsafeMkNodeId lId

    let _csd_corpus_parent = UnsafeMkNodeId <$> corpusParent

    let _csd_list_parent = UnsafeMkNodeId <$> listParent

    let _csd_map_context_ngrams = filterNgrams MapTerm ngrams_
    let _csd_stop_context_ngrams = filterNgrams StopTerm ngrams_
    let _csd_candidate_context_ngrams = filterNgrams CandidateTerm ngrams_
    
    pure $ CorpusSQLiteData { .. }

  where
    eParseVersion :: Prelude.String -> Either Text Version
    eParseVersion v = case readP_to_S parseVersion v of
      [(v_, _)] -> Right v_
      _         -> Left ("Incorrect version: " <> T.pack v)
    
    parseCtx :: (Int, Text, Prelude.String, BSL.ByteString)
             -> Either Text (NodeId, Text, UTCTime, HyperdataDocument)
    parseCtx (ctxId, name, date, hd) =
      case ( iso8601ParseM date, Aeson.decode hd ) of
        ( Just d, Just h ) -> Right ( UnsafeMkNodeId ctxId, name, d, h )
        _                  -> Left ("Context " <> show ctxId <> " parse error" :: Text)

    parseNgrams :: (Int, Text, Text) -> Either Text (ListType, (ContextId, NgramsTerm))
    parseNgrams (ctxId, term, type_) =
      case type_ of
        "map"       -> Right ( MapTerm, ( UnsafeMkContextId ctxId, NgramsTerm term ) )
        "stop"      -> Right ( StopTerm, ( UnsafeMkContextId ctxId, NgramsTerm term ) )
        "candidate" -> Right ( CandidateTerm, ( UnsafeMkContextId ctxId, NgramsTerm term ) )
        _           -> Left ("Unknown term " <> term)

    filterNgrams :: ListType -> [(ListType, (ContextId, NgramsTerm))] -> Map ContextId (Set NgramsTerm)
    filterNgrams lt ngrams_ = Map.fromListWith (<>) $
      map (\(_, (ctxId, term)) -> (ctxId, Set.singleton term))
          (filter (\(lt_, _) -> lt == lt_) ngrams_)
      

withTempSQLiteDir :: (CES.MonadMask m, MonadBase IO m)
                  => ((FilePath, Prelude.String, FilePath) -> m a)
                  -> m a
withTempSQLiteDir = CES.bracket setup tearDown
  where
    setup = do
      tmpDir <- liftBase getCanonicalTemporaryDirectory
      fp <- liftBase $ createTempDirectory tmpDir "sqlite" 
      let fname = "gargantext.sqlite"
      let fpath = fp <> "/" <> fname

      pure (fp, fname, fpath)
    tearDown (fp, _fname, _fpath) = do
      liftBase $ removeDirectoryRecursive fp
