1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{-|
Module : Gargantext.API.Node.Document.Export
Description : Document export
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Node.Document.Export
where
import Control.Lens (view)
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByNameWith, defaultEncodeOptions, encDelimiter, encQuoting, Quoting(..))
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Time.Clock.System (getSystemTime, systemSeconds)
import Data.Time.LocalTime (getCurrentTimeZone, TimeZone (timeZoneMinutes))
import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, IsGargServer)
import Gargantext.API.Routes.Named.Document qualified as Named
import Gargantext.Core (toDBid)
import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id)
import Gargantext.Prelude
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant ( addHeader, Header, Headers(getResponse) )
import Servant.Server.Generic (AsServerT)
documentExportAPI :: IsGargServer env err m
=> NodeId
-- ^ The ID of the target user
-> DocId
-> Named.DocumentExportAPI (AsServerT m)
documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExportEndpoints
{ exportJSONEp = getDocumentsJSON userNodeId dId
, exportJSONZipEp = getDocumentsJSONZip userNodeId dId
, exportTSVEp = getDocumentsTSV userNodeId dId
}
--------------------------------------------------
-- | Hashes are ordered by Set
getDocumentsJSON :: NodeId
-- ^ The ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExport)
getDocumentsJSON nodeUserId pId = do
uId <- view node_user_id <$> getNodeUser nodeUserId
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
pure $ addHeader (T.concat [ "attachment; filename="
, "GarganText_DocsList-"
, T.pack (show pId)
, ".json" ]) dexp
where
mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document =
Node { _node_id = facetDoc_id
, _node_hash_id = Nothing
, _node_typename = toDBid NodeDocument
, _node_user_id = uId
, _node_parent_id = Nothing
, _node_name = facetDoc_title
, _node_date = facetDoc_created
, _node_hyperdata = facetDoc_hyperdata }
, _d_ngrams = Ngrams { _ng_ngrams = []
, _ng_hash = "" }
, _d_hash = "" }
_mapDoc d = Document { _d_document = d
, _d_ngrams = Ngrams { _ng_ngrams = []
, _ng_hash = "" }
, _d_hash = ""}
getDocumentsJSONZip :: NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
systime <- liftBase getSystemTime
tz <- liftBase getCurrentTimeZone
let dexp = getResponse dJSON
let dexpz = DocumentExportZIP { _dez_dexp = dexp
, _dez_doc_id = pId
-- see https://github.com/jgm/zip-archive/commit/efe4423a9a2b1dc2a4d413917a933828d3f8dc0f
, _dez_last_modified = fromIntegral (systemSeconds systime) +
fromIntegral (timeZoneMinutes tz * 60) }
pure $ addHeader (T.concat [ "attachment; filename="
, dezFileName dexpz
, ".zip" ]) dexpz
getDocumentsTSV :: NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document]
getDocumentsTSV userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
let DocumentExport { _de_documents } = getResponse dJSON
let ret = TE.decodeUtf8 $ BSC.toStrict $ encodeDefaultOrderedByNameWith (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t', encQuoting = QuoteAll }) _de_documents
pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-"
, T.pack $ show pId
, ".tsv"])
ret