Commit 842b3d36 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Support exporting docs and ngrams (but not importing them yet)

parent 98708c2e
......@@ -698,6 +698,7 @@ makeLenses ''Versioned
instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
declareNamedSchema = wellNamedSchema "_v_"
instance NFData a => NFData (Versioned a) where
instance Serialise a => Serialise (Versioned a) where
------------------------------------------------------------------------
type Count = Int
......
......@@ -9,6 +9,10 @@ Portability : POSIX
-}
module Gargantext.API.Node.Document.Export
( documentExportAPI
-- * Internals
, get_document_json
)
where
import Control.Lens (view)
......@@ -20,7 +24,7 @@ import Data.Time.Clock.System (getSystemTime, systemSeconds)
import Data.Time.LocalTime (getCurrentTimeZone, TimeZone (timeZoneMinutes))
import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, IsGargServer)
import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Named.Document qualified as Named
import Gargantext.Core (toDBid)
import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
......@@ -46,21 +50,26 @@ documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExpor
--------------------------------------------------
-- | Hashes are ordered by Set
getDocumentsJSON :: NodeId
getDocumentsJSON :: IsGargServer env err m
=> NodeId
-- ^ The ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExport)
-> m (Headers '[Header "Content-Disposition" T.Text] DocumentExport)
getDocumentsJSON nodeUserId pId = do
uId <- view node_user_id <$> getNodeUser nodeUserId
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
dexp <- get_document_json nodeUserId pId
pure $ addHeader (T.concat [ "attachment; filename="
, "GarganText_DocsList-"
, T.pack (show pId)
, ".json" ]) dexp
get_document_json :: IsGargServer err env m => NodeId -> DocId -> m DocumentExport
get_document_json nodeUserId pId = do
uId <- view node_user_id <$> getNodeUser nodeUserId
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
pure DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
where
mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document =
......@@ -80,10 +89,11 @@ getDocumentsJSON nodeUserId pId = do
, _ng_hash = "" }
, _d_hash = ""}
getDocumentsJSONZip :: NodeId
getDocumentsJSONZip :: IsGargServer env err m
=> NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
-> m (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
systime <- liftBase getSystemTime
......@@ -98,10 +108,11 @@ getDocumentsJSONZip userNodeId pId = do
, dezFileName dexpz
, ".zip" ]) dexpz
getDocumentsTSV :: NodeId
getDocumentsTSV :: IsGargServer err env m
=> NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document]
-> m (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document]
getDocumentsTSV userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
let DocumentExport { _de_documents } = getResponse dJSON
......
......@@ -13,12 +13,13 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where
import Codec.Serialise.Class hiding (encode)
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T
import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
......@@ -36,6 +37,8 @@ data DocumentExport =
, _de_garg_version :: Text
} deriving (Generic)
instance Serialise DocumentExport where
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data DocumentExportZIP =
DocumentExportZIP { _dez_dexp :: DocumentExport
......@@ -49,6 +52,7 @@ data Document =
, _d_hash :: Hash
} deriving (Generic)
instance Serialise Document where
--instance Read Document where
-- read "" = panic "not implemented"
instance DefaultOrdered Document where
......@@ -102,6 +106,8 @@ instance ToParamSchema Document where
instance ToParamSchema Ngrams where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance Serialise Ngrams where
$(deriveJSON (unPrefix "_ng_") ''Ngrams)
$(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_de_") ''DocumentExport)
......
......@@ -26,21 +26,39 @@ import Gargantext.API.Admin.Auth
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..))
import Gargantext.API.Auth.PolicyCheck (remoteExportChecks)
import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Document.Export.Types (DocumentExport)
import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Client (remoteImportClient)
import Gargantext.API.Routes.Named.Remote qualified as Named
import Gargantext.Core.Config
import Gargantext.Core (lookupDBid)
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..))
import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode)
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Orphans ()
import GHC.Generics (Generic)
import Prelude
import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError)
import Servant.Server.Generic (AsServerT)
import Gargantext.Core.Types.Main
import Gargantext.API.Node.Document.Export (get_document_json)
data ExportableNode =
ExportableNode {
_en_node :: Node JSON.Value
-- | If this node is a \"docs\" node, remotely export also
-- all the associated documents.
, _en_associated_docs :: Maybe DocumentExport
-- | If this node is a \"terms\" node, remotely export also
-- all the associated ngrams
, _en_associated_ngrams :: Maybe NgramsList
} deriving Generic
instance Serialise ExportableNode where
remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m)
=> AuthenticatedUser
......@@ -48,11 +66,11 @@ remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m)
remoteAPI authenticatedUser = Named.RemoteAPI $
Named.RemoteAPI'
{ remoteExportEp = \payload@Named.RemoteExportRequest{..} mgr ->
withPolicy authenticatedUser (remoteExportChecks _rer_node_id) (remoteExportHandler payload) mgr
withPolicy authenticatedUser (remoteExportChecks _rer_node_id) (remoteExportHandler authenticatedUser payload) mgr
, remoteImportEp = remoteImportHandler authenticatedUser
}
type ExpectedPayload = Tree (Node JSON.Value)
type ExpectedPayload = Tree ExportableNode
remoteImportHandler :: forall err env m.
(HasNodeError err, HasBackendInternalError err, IsDBCmd env err m, MonadIO m)
......@@ -74,14 +92,14 @@ remoteImportHandler loggedInUser c = do
foldlM (insertTrees (Just rootNode)) [rootNode] xs
where
insertNode :: Maybe NodeId -> Node JSON.Value -> m NodeId
insertNode mb_parent x = case lookupDBid $ _node_typename x of
insertNode :: Maybe NodeId -> ExportableNode -> m NodeId
insertNode mb_parent (ExportableNode x _mb_docs _mb_terms) = case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
pure new_node
insertTrees :: Maybe NodeId -> [NodeId] -> Tree (Node JSON.Value) -> m [NodeId]
insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId]
insertTrees currentParent !acc (TreeN x xs) = do
childrenRoot <- insertNode currentParent x
(`mappend` acc) <$> foldlM (insertTrees (Just childrenRoot)) [childrenRoot] xs
......@@ -91,15 +109,37 @@ remoteImportHandler loggedInUser c = do
remoteExportHandler :: ( MonadIO m, Safe.MonadCatch m
, IsGargServer err env m
)
=> Named.RemoteExportRequest
=> AuthenticatedUser
-> Named.RemoteExportRequest
-> m [NodeId]
remoteExportHandler Named.RemoteExportRequest{..} = do
remoteExportHandler loggedInUser Named.RemoteExportRequest{..} = do
mgr <- view gargHttpManager
nodes <- getNodes _rer_node_id
checkNodesTypeAllowed nodes
liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder nodes)) (mkClientEnv mgr _rer_instance_url) streamDecode)
exportable <- makeExportable (_auth_node_id loggedInUser) nodes
liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder exportable)) (mkClientEnv mgr _rer_instance_url) streamDecode)
`Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e
makeExportable :: IsGargServer err env m
=> NodeId
-> Tree (Node JSON.Value)
-> m (Tree ExportableNode)
makeExportable userNodeId (TreeN x xs)
| Just nty <- lookupDBid (_node_typename x)
= do
mb_docs <- case nty of
NodeDocument -> Just <$> get_document_json userNodeId (_node_id x)
_ -> pure Nothing
mb_ngrams <- case nty of
NodeList -> Just <$> getNgramsList (_node_id x)
_ -> pure Nothing
let exportableRoot = ExportableNode x mb_docs mb_ngrams
children <- mapM (makeExportable userNodeId) xs
pure $ TreeN exportableRoot children
| otherwise
= throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m ()
checkNodesTypeAllowed (TreeN r xs) = do
checkNodeTypeAllowed r
......@@ -118,7 +158,7 @@ checkNodeTypeAllowed n
exportableNodeTypes :: [NodeType]
exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo ]
streamEncoder :: (MonadIO m, Serialise a) => a -> ConduitT () Named.RemoteBinaryData m ()
streamEncoder :: MonadIO m => ExpectedPayload -> ConduitT () Named.RemoteBinaryData m ()
streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise
-- | Returns a conduit which can be used to decode
......
......@@ -88,6 +88,7 @@ data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
deriving (Generic, Show, Eq, Ord)
instance Hashable Ngrams
instance Serialise Ngrams where
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
......
......@@ -17,6 +17,7 @@ import Gargantext.Prelude hiding (ByteString)
import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Codec.Serialise.Class hiding (decode)
------------------------------------------------------------------------
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
......@@ -41,6 +42,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T
deriving (Show, Generic)
instance NFData HyperdataDocument
instance Serialise HyperdataDocument
instance HasText HyperdataDocument
where
......
......@@ -16,13 +16,14 @@ Portability : POSIX
module Gargantext.Database.Schema.Node where
import Codec.Serialise
import Codec.CBOR.JSON qualified as CBOR
import Codec.Serialise
import Control.Lens hiding (elements, (&))
import Data.Aeson qualified as JSON
import Data.Aeson (ToJSON, toJSON, parseJSON, FromJSON)
import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude (NFData(..))
import Prelude hiding (null, id, map, sum)
import Data.Aeson.Types (parseEither)
------------------------------------------------------------------------
-- Main polymorphic Node definition
......@@ -57,7 +58,9 @@ instance ( Serialise i
, Serialise p
, Serialise n
, Serialise d
) => Serialise (NodePoly i h t u p n d JSON.Value) where
, ToJSON json
, FromJSON json
) => Serialise (NodePoly i h t u p n d json) where
encode Node{..} =
encode _node_id <>
encode _node_hash_id <>
......@@ -66,7 +69,7 @@ instance ( Serialise i
encode _node_parent_id <>
encode _node_name <>
encode _node_date <>
CBOR.encodeValue _node_hyperdata
CBOR.encodeValue (toJSON _node_hyperdata)
decode = do
_node_id <- decode
_node_hash_id <- decode
......@@ -75,8 +78,10 @@ instance ( Serialise i
_node_parent_id <- decode
_node_name <- decode
_node_date <- decode
_node_hyperdata <- CBOR.decodeValue False
pure Node{..}
mb_node_hyperdata <- parseEither parseJSON <$> CBOR.decodeValue False
case mb_node_hyperdata of
Left err -> fail err
Right _node_hyperdata -> pure Node{..}
------------------------------------------------------------------------
-- Automatic instances derivation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment