{-|
Module      : Gargantext.Core.Text.List.Formats.TSV
Description : 
Copyright   : (c) CNRS, 2018-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

TSV parser for Gargantext corpus files.

-}


module Gargantext.Core.Text.List.Formats.TSV where

import Control.Applicative
import Data.ByteString.Lazy qualified as BL
import Data.Csv
import Data.Text (pack)
import Data.Text qualified as DT
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length)

------------------------------------------------------------------------
tsvMapTermList :: FilePath -> IO TermList
tsvMapTermList fp = tsv2list TsvMap <$> snd <$>  fromTsvListFile fp

tsv2list :: TsvListType -> Vector TsvList -> TermList
tsv2list lt vs = V.toList $ V.map (\(TsvList _ label forms)
                           -> (DT.words label, [DT.words label] <> (filter (not . null) . map DT.words $ DT.splitOn tsvListFormsDelimiter forms)))
                         $ V.filter (\l -> tsvList_status l == lt ) vs

------------------------------------------------------------------------
data TsvListType = TsvMap | TsvStop | TsvCandidate
  deriving (Read, Show, Eq)
------------------------------------------------------------------------
-- TSV List Main Configuration
tsvListFieldDelimiter :: Char
tsvListFieldDelimiter = '\t'

tsvListFormsDelimiter :: Text
tsvListFormsDelimiter = "|&|"
------------------------------------------------------------------------
data TsvList = TsvList
    { tsvList_status :: !TsvListType
    , tsvList_label  :: !Text
    , tsvList_forms  :: !Text
    }
    deriving (Show)
------------------------------------------------------------------------
instance FromNamedRecord TsvList where
  parseNamedRecord r = TsvList <$> r .: "status"
                               <*> r .: "label"
                               -- Issue #381: be lenient in the forms
                               -- field, if missing, default to the empty text.
                               <*> (fromMaybe mempty <$> r .: "forms")

instance ToNamedRecord TsvList where
  toNamedRecord (TsvList s l f) =
    namedRecord [ "status" .= s
                , "label"  .= l
                , "forms"  .= f
                ]
------------------------------------------------------------------------
instance FromField TsvListType where
    parseField "map"       = pure TsvMap
    parseField "main"      = pure TsvCandidate
    parseField "candidate" = pure TsvCandidate -- backward compat
    parseField "stop"      = pure TsvStop
    parseField _           = mzero

instance ToField TsvListType where
    toField TsvMap       = "map"
    toField TsvCandidate = "main"
    toField TsvStop      = "stop"
------------------------------------------------------------------------
tsvDecodeOptions :: DecodeOptions
tsvDecodeOptions = (defaultDecodeOptions
                      {decDelimiter = fromIntegral $ ord tsvListFieldDelimiter}
                    )

tsvEncodeOptions :: EncodeOptions
tsvEncodeOptions = ( defaultEncodeOptions 
                      {encDelimiter = fromIntegral $ ord tsvListFieldDelimiter}
                    )
------------------------------------------------------------------------
fromTsvListFile :: FilePath -> IO (Header, Vector TsvList)
fromTsvListFile fp = do
    tsvData <- BL.readFile fp
    case decodeByNameWith tsvDecodeOptions tsvData of
      Left e        -> panicTrace (pack e)
      Right tsvList -> pure tsvList
------------------------------------------------------------------------
toTsvListFile :: FilePath -> (Header, Vector TsvList) -> IO ()
toTsvListFile fp (h, vs) = BL.writeFile fp $
                      encodeByNameWith tsvEncodeOptions h (V.toList vs)
------------------------------------------------------------------------
