[export] allow to export a zipped JSON

parent 1a806995
Pipeline #5686 passed with stages
in 93 minutes and 48 seconds
......@@ -630,6 +630,7 @@ library
, xml-types ^>= 0.3.8
, yaml ^>= 0.11.8.0
, zip ^>= 1.7.2
, zip-archive ^>= 0.4.3
, zlib ^>= 0.6.2.3
executable gargantext-admin
......
......@@ -55,16 +55,18 @@ import Servant
------------------------------------------------------------------------
type GETAPI = Summary "Get List"
:> "lists"
:> Capture "listId" ListId
:> "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "lists"
:> "lists"
:> Capture "listId" ListId
:> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
:> ( "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "json.zip"
:> Get '[GUS.ZIP] (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
:<|> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap) )
getApi :: GargServer GETAPI
getApi = getJson :<|> getCsv
getApi listId = getJson listId
:<|> getJsonZip listId
:<|> getCsv listId
--
-- JSON API
......@@ -94,6 +96,18 @@ getJson lId = do
]
) lst
getJsonZip :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
getJsonZip lId = do
lst <- getNgramsList lId
let nlz = NgramsListZIP { _nlz_nl = lst, _nlz_list_id = lId}
pure $ addHeader (concat [ "attachment; filename="
, nlzFileName nlz
, ".zip"
]
) nlz
getCsv :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
......
......@@ -20,13 +20,12 @@ import Data.List qualified as List
import Data.Map.Strict (fromList)
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Data.Validity
import Gargantext.API.Ngrams (getNgramsTableMap)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListType)
import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap )
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude
......
......@@ -20,20 +20,19 @@ module Gargantext.API.Ngrams.Types where
import Codec.Serialise (Serialise())
import Control.Category ((>>>))
import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over)
import Control.Monad.State
import Data.Aeson hiding ((.=))
import Data.Aeson.TH (deriveJSON)
import Data.Foldable
import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone))
import Data.Csv qualified as Csv
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new)
import Data.Set qualified as Set
import Data.String (IsString(..))
import Data.Swagger hiding (version, patch)
import Data.Text (pack, strip)
import Data.Validity
import Data.Swagger ( NamedSchema(NamedSchema), declareSchemaRef, genericDeclareNamedSchema, SwaggerType(SwaggerObject), ToParamSchema, ToSchema(..), HasProperties(properties), HasRequired(required), HasType(type_) )
import Data.Text qualified as T
import Data.Validity ( Validity(..) )
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import Gargantext.Core.Text (size)
......@@ -42,10 +41,12 @@ import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Servant hiding (Patch)
import Gargantext.Utils.Servant (CSV, ZIP)
import Gargantext.Utils.Zip (zipContentsPure)
import Servant ( FromHttpApiData(parseUrlPiece), ToHttpApiData(toUrlPiece), Required, Strict, QueryParam', MimeRender(.. ))
import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......@@ -79,7 +80,7 @@ instance FromHttpApiData TabType where
parseUrlPiece _ = Left "Unexpected value of TabType"
instance ToHttpApiData TabType where
toUrlPiece = pack . show
toUrlPiece = T.pack . show
instance ToParamSchema TabType
instance ToJSON TabType
instance FromJSON TabType
......@@ -128,9 +129,9 @@ instance IsHashable NgramsTerm where
instance Monoid NgramsTerm where
mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ T.strip t
instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s
fromString s = NgramsTerm $ T.pack s
data RootParent = RootParent
......@@ -266,7 +267,7 @@ instance FromHttpApiData OrderBy
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToHttpApiData OrderBy where
toUrlPiece = pack . show
toUrlPiece = T.pack . show
instance ToParamSchema OrderBy
instance FromJSON OrderBy
......@@ -286,6 +287,27 @@ data NgramsSearchQuery = NgramsSearchQuery
------------------------------------------------------------------------
type NgramsTableMap = Map NgramsTerm NgramsRepoElement
-- CSV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where
encOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t')
, encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" Csv..= toText _nre_list
, "label" Csv..= term
, "forms" Csv..= T.intercalate "|&|" (unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text
toText CandidateTerm = "candidate"
toText MapTerm = "map"
toText StopTerm = "stop"
------------------------------------------------------------------------
-- On the Client side:
--data Action = InGroup NgramsId NgramsId
......@@ -763,6 +785,22 @@ instance ToSchema UpdateTableNgramsCharts where
------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
-- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name
data NgramsListZIP =
NgramsListZIP { _nlz_nl :: NgramsList
, _nlz_list_id :: ListId } deriving (Generic)
instance ToSchema NgramsListZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nlz_")
nlzFileName :: NgramsListZIP -> Text
nlzFileName (NgramsListZIP { .. }) = "GarganText_NgramsList-" <> show _nlz_list_id <> ".json"
instance MimeRender ZIP NgramsListZIP where
mimeRender _ nlz@(NgramsListZIP { .. }) =
zipContentsPure (T.unpack $ nlzFileName nlz) (encode _nlz_nl)
--
-- Serialise instances
--
......
......@@ -12,28 +12,29 @@ module Gargantext.API.Node.Document.Export
where
import Control.Lens (view)
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByName)
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, GargServer)
import Gargantext.Core (toDBid)
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.User
import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id)
import Gargantext.Prelude
import Servant
import qualified Data.ByteString.Lazy.Char8 as BSC
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Paths_gargantext as PG -- cabal magic build module
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant ( addHeader, (:<|>)((:<|>)), Header, Headers(getResponse) )
api :: NodeId
-- ^ The ID of the target user
-> DocId
-> GargServer API
api userNodeId dId = getDocumentsJSON userNodeId dId
:<|> getDocumentsJSONZip userNodeId dId
:<|> getDocumentsCSV userNodeId dId
--------------------------------------------------
......@@ -47,11 +48,12 @@ getDocumentsJSON nodeUserId pId = do
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-"
, T.pack $ show pId
, ".json"])
DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
pure $ addHeader (T.concat [ "attachment; filename="
, "GarganText_DocsList-"
, T.pack (show pId)
, ".json" ]) dexp
where
mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document =
......@@ -71,6 +73,18 @@ getDocumentsJSON nodeUserId pId = do
, _ng_hash = "" }
, _d_hash = ""}
getDocumentsJSONZip :: NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
let dexp = getResponse dJSON
let dexpz = DocumentExportZIP { _dez_dexp = dexp, _dez_doc_id = pId }
pure $ addHeader (T.concat [ "attachment; filename="
, dezFileName dexpz
, ".zip" ]) dexpz
getDocumentsCSV :: NodeId
-- ^ The Node ID of the target user
-> DocId
......
......@@ -13,19 +13,21 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger
--import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Gargantext.Core.Types
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node (DocId)
import Gargantext.Database.Schema.Node (NodePoly(..))
--import Gargantext.Utils.Servant (CSV)
import Gargantext.Utils.Servant (ZIP)
import Gargantext.Utils.Zip (zipContentsPure)
import Protolude
--import Protolude.Partial (read)
import Servant
import Servant ((:>), (:<|>), Get, Header, Headers(..), JSON, MimeRender(..), PlainText, Summary)
-- | Document Export
......@@ -34,6 +36,12 @@ data DocumentExport =
, _de_garg_version :: Text
} deriving (Generic)
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data DocumentExportZIP =
DocumentExportZIP { _dez_dexp :: DocumentExport
, _dez_doc_id :: DocId } deriving (Generic)
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: Ngrams
......@@ -71,6 +79,9 @@ type Hash = Text
instance ToSchema DocumentExport where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_de_")
instance ToSchema DocumentExportZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_dez_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
......@@ -81,6 +92,9 @@ instance ToSchema Ngrams where
instance ToParamSchema DocumentExport where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema DocumentExportZIP where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
......@@ -90,10 +104,25 @@ instance ToParamSchema Ngrams where
type API = Summary "Document Export"
:> "export"
:> ( "json"
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:<|> "json.zip"
:> Get '[ZIP] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExportZIP)
:<|> "csv"
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text)) -- [Document])
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text) )
$(deriveJSON (unPrefix "_ng_") ''Ngrams)
$(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_de_") ''DocumentExport)
------
-- Needs to be here because of deriveJSON TH above
dezFileName :: DocumentExportZIP -> Text
dezFileName (DocumentExportZIP { .. }) = "GarganText_DocsList-" <> show _dez_doc_id <> ".json"
instance MimeRender ZIP DocumentExportZIP where
mimeRender _ dexpz@(DocumentExportZIP { .. }) =
zipContentsPure (T.unpack $ dezFileName dexpz) (encode _dez_dexp)
......@@ -10,18 +10,16 @@ Portability : POSIX
module Gargantext.Utils.Servant where
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Csv (defaultEncodeOptions, encodeByNameWith, encodeDefaultOrderedByName, header, namedRecord, (.=), DefaultOrdered, EncodeOptions(..), NamedRecord, Quoting(QuoteNone), ToNamedRecord)
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Gargantext.API.Ngrams.Types (mSetToList, NgramsRepoElement(..), NgramsTableMap, NgramsTerm(..), unNgramsTerm)
import Gargantext.Core.Types.Main (ListType(..))
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByName, DefaultOrdered, ToNamedRecord)
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Network.HTTP.Media ((//), (/:))
import qualified Prelude
import Prelude qualified
import Protolude
import Protolude.Partial (read)
import Servant
import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) )
data CSV = CSV
......@@ -34,25 +32,6 @@ instance (DefaultOrdered a, ToNamedRecord a) => MimeRender CSV [a] where
instance MimeRender CSV T.Text where
mimeRender _ = BSC.fromStrict . TE.encodeUtf8
-- CSV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where
encOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t')
, encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" .= toText _nre_list
, "label" .= term
, "forms" .= (T.intercalate "|&|" $ unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text
toText CandidateTerm = "candidate"
toText MapTerm = "map"
toText StopTerm = "stop"
instance Read a => MimeUnrender CSV a where
mimeUnrender _ bs = case BSC.take len bs of
"text/csv" -> pure . read . BSC.unpack $ BSC.drop len bs
......@@ -76,3 +55,18 @@ instance MimeRender Markdown T.Text where
instance MimeUnrender Markdown T.Text where
mimeUnrender _ = Right . TE.decodeUtf8 . BSC.toStrict
---------------------------
data ZIP = ZIP
instance Accept ZIP where
contentType _ = "application" // "zip"
instance MimeRender ZIP BSC.ByteString where
mimeRender _ = identity
instance MimeUnrender ZIP BSC.ByteString where
mimeUnrender _ = Right . identity
......@@ -15,18 +15,38 @@ Utilities for handling zip files
module Gargantext.Utils.Zip where
import "zip" Codec.Archive.Zip (withArchive, ZipArchive)
-- import Control.Monad.Base (liftBase)
import "zip" Codec.Archive.Zip (addEntry, createArchive, mkEntrySelector, withArchive, CompressionMethod(BZip2), ZipArchive)
import "zip-archive" Codec.Archive.Zip qualified as ZArch
import Control.Monad.Base (MonadBase, liftBase)
import Data.ByteString qualified as BS
import Data.ByteString.Lazy.Char8 qualified as BSC
import Protolude
import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile)
-- | Take a zip file (in for of a ByteString) and work on its contents (using the ZipArchive monad)
withZipFileBS :: MonadIO m => BS.ByteString -> ZipArchive a -> m a
withZipFileBS bs actions =
liftIO $ bracket (emptySystemTempFile "parsed-zip")
(\path -> removeFile path) $
\path -> do
BS.writeFile path bs
withArchive path actions
withZipFileBS bs actions = liftIO $
bracket (emptySystemTempFile "parsed-zip")
removeFile
(\path -> do
BS.writeFile path bs
withArchive path actions)
-- | Zip ByteString contents and return the ZIP file as ByteString
zipContents :: MonadBase IO m => FilePath -> BS.ByteString -> m BS.ByteString
zipContents fpath bsContents = liftBase $
bracket (emptySystemTempFile "zip-contents")
removeFile
(\path -> do
s <- mkEntrySelector fpath
createArchive path (addEntry BZip2 bsContents s)
BS.readFile path)
-- | Same as zipContents above, but pure (in-memory)
zipContentsPure :: FilePath -> BSC.ByteString -> BSC.ByteString
zipContentsPure fpath bscContents = ZArch.fromArchive (ZArch.addEntryToArchive e ZArch.emptyArchive)
where
e = ZArch.toEntry fpath 0 bscContents
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment