{-| Module : Gargantext.API.Node.Document.Export Description : Document export Copyright : (c) CNRS, 2017-Present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX -} module Gargantext.API.Node.Document.Export ( documentExportAPI -- * Internals , get_document_json ) where import Control.Lens (view) import Data.ByteString.Lazy.Char8 qualified as BSC import Data.Csv (encodeDefaultOrderedByNameWith, defaultEncodeOptions, encDelimiter, encQuoting, Quoting(..)) import Data.Text qualified as T import Data.Text.Encoding qualified as TE import Data.Time.Clock.System (getSystemTime, systemSeconds) import Data.Time.LocalTime (getCurrentTimeZone, TimeZone (timeZoneMinutes)) import Data.Version (showVersion) import Gargantext.API.Node.Document.Export.Types import Gargantext.API.Prelude (IsGargServer) import Gargantext.API.Routes.Named.Document qualified as Named import Gargantext.Core (toDBid) import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..)) import Gargantext.Database.Prelude import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..)) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType) import Gargantext.Database.Query.Table.Node.User ( getNodeUser ) import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id) import Gargantext.Prelude import Paths_gargantext qualified as PG -- cabal magic build module import Servant ( addHeader, Header, Headers(getResponse) ) import Servant.Server.Generic (AsServerT) documentExportAPI :: IsGargServer env err m => NodeId -- ^ The ID of the target user -> DocId -> Named.DocumentExportAPI (AsServerT m) documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExportEndpoints { exportJSONEp = getDocumentsJSON userNodeId dId , exportJSONZipEp = getDocumentsJSONZip userNodeId dId , exportTSVEp = getDocumentsTSV userNodeId dId } -------------------------------------------------- -- | Hashes are ordered by Set getDocumentsJSON :: IsGargServer env err m => NodeId -- ^ The ID of the target user -> DocId -> m (Headers '[Header "Content-Disposition" T.Text] DocumentExport) getDocumentsJSON nodeUserId pId = do dexp <- get_document_json nodeUserId pId pure $ addHeader (T.concat [ "attachment; filename=" , "GarganText_DocsList-" , T.pack (show pId) , ".json" ]) dexp get_document_json :: IsGargServer err env m => NodeId -> DocId -> m DocumentExport get_document_json nodeUserId pId = do runDBQuery $ do uId <- view node_user_id <$> getNodeUser nodeUserId mcId <- getClosestParentIdByType pId NodeCorpus let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing pure DocumentExport { _de_documents = mapFacetDoc uId <$> docs , _de_garg_version = T.pack $ showVersion PG.version } where mapFacetDoc uId (FacetDoc { .. }) = Document { _d_document = Node { _node_id = facetDoc_id , _node_hash_id = Nothing , _node_typename = toDBid NodeDocument , _node_user_id = uId , _node_parent_id = Nothing , _node_name = facetDoc_title , _node_date = facetDoc_created , _node_hyperdata = facetDoc_hyperdata } , _d_ngrams = Ngrams { _ng_ngrams = [] , _ng_hash = "" } , _d_hash = "" } _mapDoc d = Document { _d_document = d , _d_ngrams = Ngrams { _ng_ngrams = [] , _ng_hash = "" } , _d_hash = ""} getDocumentsJSONZip :: IsGargServer env err m => NodeId -- ^ The Node ID of the target user -> DocId -> m (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document] getDocumentsJSONZip userNodeId pId = do dJSON <- getDocumentsJSON userNodeId pId systime <- liftBase getSystemTime tz <- liftBase getCurrentTimeZone let dexp = getResponse dJSON let dexpz = DocumentExportZIP { _dez_dexp = dexp , _dez_doc_id = pId -- see https://github.com/jgm/zip-archive/commit/efe4423a9a2b1dc2a4d413917a933828d3f8dc0f , _dez_last_modified = fromIntegral (systemSeconds systime) + fromIntegral (timeZoneMinutes tz * 60) } pure $ addHeader (T.concat [ "attachment; filename=" , dezFileName dexpz , ".zip" ]) dexpz getDocumentsTSV :: IsGargServer err env m => NodeId -- ^ The Node ID of the target user -> DocId -> m (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document] getDocumentsTSV userNodeId pId = do dJSON <- getDocumentsJSON userNodeId pId let DocumentExport { _de_documents } = getResponse dJSON let ret = TE.decodeUtf8 $ BSC.toStrict $ encodeDefaultOrderedByNameWith (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t', encQuoting = QuoteAll }) _de_documents pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-" , T.pack $ show pId , ".tsv"]) ret