{-|
Module      : Gargantext.API.Corpus.New
Description : New corpus API
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

New corpus means either:
- new corpus
- new data in existing corpus
-}

{-# LANGUAGE NoImplicitPrelude  #-}
{-# LANGUAGE TemplateHaskell    #-}
{-# LANGUAGE DeriveGeneric      #-}
{-# LANGUAGE DataKinds          #-}
{-# LANGUAGE TypeOperators      #-}
{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE FlexibleContexts   #-}
{-# LANGUAGE RankNTypes         #-}

module Gargantext.API.Corpus.New
      where

--import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat(..))
import Control.Lens hiding (elements)
import Control.Monad.IO.Class (liftIO)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Corpus.New.File
import Gargantext.API.Orchestrator.Types
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude
import Gargantext.Text.Corpus.Parsers.CSV (parseCsv', parseHal')
import Gargantext.Text.Terms (TermType(..))
import Servant
import Servant.API.Flatten (Flat)
import Servant.Job.Core
import Servant.Job.Types
import Servant.Job.Utils (jsonOptions)
import Servant.Multipart
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Web.FormUrlEncoded          (FromForm)
import qualified Gargantext.Text.Corpus.API as API

data Query = Query { query_query      :: Text
                   , query_corpus_id  :: Int
                   , query_databases  :: [API.ExternalAPIs]
                   }
                   deriving (Eq, Show, Generic)

deriveJSON (unPrefix "query_") 'Query


instance Arbitrary Query where
    arbitrary = elements [ Query q n fs
                         | q <- ["a","b"]
                         , n <- [0..10]
                         , fs <- take 3 $ repeat API.externalAPIs
                         ]

instance ToSchema Query where
  declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")

type Api = Summary "New Corpus endpoint"
         :> ReqBody '[JSON] Query
         :> Post '[JSON] CorpusId
        :<|> Get '[JSON] ApiInfo

-- | TODO manage several apis
-- TODO-ACCESS
-- TODO this is only the POST
api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
api _uId (Query q _ as) = do
  cId <- case head as of
    Nothing      -> flowCorpusSearchInDatabase "user1" EN q
    Just API.All -> flowCorpusSearchInDatabase "user1" EN q
    Just a   -> do
      docs <- liftIO $ API.get a q (Just 1000)
      cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
      pure cId'

  pure cId

------------------------------------------------
data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
  deriving (Generic)
instance Arbitrary ApiInfo where
  arbitrary = ApiInfo <$> arbitrary

deriveJSON (unPrefix "") 'ApiInfo

instance ToSchema ApiInfo

info :: FlowCmdM env err m => UserId -> m ApiInfo
info _u = pure $ ApiInfo API.externalAPIs

------------------------------------------------------------------------
------------------------------------------------------------------------
data WithQuery = WithQuery
  { _wq_query     :: !Text
  , _wq_databases :: ![ExternalAPIs]
  , _wq_lang      :: !(Maybe Lang)
  }
  deriving Generic

makeLenses ''WithQuery
instance FromJSON WithQuery where
  parseJSON = genericParseJSON $ jsonOptions "_wq_"
instance ToSchema WithQuery where
  declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wq_")

-------------------------------------------------------
data WithForm = WithForm
  { _wf_filetype :: !FileType
  , _wf_data     :: !Text
  } deriving (Eq, Show, Generic)

makeLenses ''WithForm
instance FromForm WithForm
instance FromJSON WithForm where
  parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToSchema WithForm where
  declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")

------------------------------------------------------------------------
type AsyncJobs event ctI input output =
  Flat (AsyncJobsAPI' 'Unsafe 'Safe ctI '[JSON] Maybe event input output)
------------------------------------------------------------------------

type Upload = Summary "Corpus Upload endpoint"
   :> "corpus"
   :> Capture "corpus_id" CorpusId
    :<|> "addWithquery" :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus
    :<|> "addWithfile"  :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus


type AddWithQuery = Summary "Add with Query to corpus endpoint"
   :> "corpus"
   :> Capture "corpus_id" CorpusId
   :> "add"
   :> "query"
   :> "async"
   :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus

type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
   :> "corpus"
   :> Capture "corpus_id" CorpusId
   :> "add" 
   :> "file"
   :> MultipartForm Mem (MultipartData Mem)
   :> QueryParam "fileType"  FileType
   :> "async"
   :> AsyncJobs ScraperStatus '[JSON] () ScraperStatus

type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
   :> "corpus"
   :> Capture "corpus_id" CorpusId
   :> "add"
   :> "form"
   :> "async"
   :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus

------------------------------------------------------------------------
-- TODO WithQuery also has a corpus id
addToCorpusJobFunction :: FlowCmdM env err m
                       => CorpusId
                       -> WithQuery
                       -> (ScraperStatus -> m ())
                       -> m ScraperStatus
addToCorpusJobFunction _cid (WithQuery _q _dbs _l) logStatus = do
  -- TODO ...
  logStatus ScraperStatus { _scst_succeeded = Just 10
                          , _scst_failed    = Just 2
                          , _scst_remaining = Just 138
                          , _scst_events    = Just []
                          }
  -- TODO ...
  pure      ScraperStatus { _scst_succeeded = Just 137
                          , _scst_failed    = Just 13
                          , _scst_remaining = Just 0
                          , _scst_events    = Just []
                          }


addToCorpusWithFile :: FlowCmdM env err m
                    => CorpusId
                    -> MultipartData Mem
                    -> Maybe FileType
                    -> (ScraperStatus -> m ())
                    -> m ScraperStatus
addToCorpusWithFile cid input filetype logStatus = do
  logStatus ScraperStatus { _scst_succeeded = Just 10
                          , _scst_failed    = Just 2
                          , _scst_remaining = Just 138
                          , _scst_events    = Just []
                          }
  _h <- postUpload cid filetype input

  pure      ScraperStatus { _scst_succeeded = Just 137
                          , _scst_failed    = Just 13
                          , _scst_remaining = Just 0
                          , _scst_events    = Just []
                          }

addToCorpusWithForm :: FlowCmdM env err m
                    => CorpusId
                    -> WithForm
                    -> (ScraperStatus -> m ())
                    -> m ScraperStatus
addToCorpusWithForm cid (WithForm ft d) logStatus = do

  printDebug "ft" ft

  let
    parse = case ft of
      CSV_HAL -> parseHal'
      CSV     -> parseCsv'
      _       -> parseHal'

    docs = splitEvery 500
           $ take 1000000
           $ parse (cs d)

  logStatus ScraperStatus { _scst_succeeded = Just 1
                          , _scst_failed    = Just 0
                          , _scst_remaining = Just 1
                          , _scst_events    = Just []
                          }

  cid' <- flowCorpus "user1" (Right [cid]) (Multi EN) (map (map toHyperdataDocument) docs)
  printDebug "cid'" cid' 

  pure      ScraperStatus { _scst_succeeded = Just 2
                          , _scst_failed    = Just 0
                          , _scst_remaining = Just 0
                          , _scst_events    = Just []
                          }