Commit 842b3d36 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Support exporting docs and ngrams (but not importing them yet)

parent 98708c2e
...@@ -698,6 +698,7 @@ makeLenses ''Versioned ...@@ -698,6 +698,7 @@ makeLenses ''Versioned
instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
declareNamedSchema = wellNamedSchema "_v_" declareNamedSchema = wellNamedSchema "_v_"
instance NFData a => NFData (Versioned a) where instance NFData a => NFData (Versioned a) where
instance Serialise a => Serialise (Versioned a) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Count = Int type Count = Int
......
...@@ -9,6 +9,10 @@ Portability : POSIX ...@@ -9,6 +9,10 @@ Portability : POSIX
-} -}
module Gargantext.API.Node.Document.Export module Gargantext.API.Node.Document.Export
( documentExportAPI
-- * Internals
, get_document_json
)
where where
import Control.Lens (view) import Control.Lens (view)
...@@ -20,7 +24,7 @@ import Data.Time.Clock.System (getSystemTime, systemSeconds) ...@@ -20,7 +24,7 @@ import Data.Time.Clock.System (getSystemTime, systemSeconds)
import Data.Time.LocalTime (getCurrentTimeZone, TimeZone (timeZoneMinutes)) import Data.Time.LocalTime (getCurrentTimeZone, TimeZone (timeZoneMinutes))
import Data.Version (showVersion) import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, IsGargServer) import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Named.Document qualified as Named import Gargantext.API.Routes.Named.Document qualified as Named
import Gargantext.Core (toDBid) import Gargantext.Core (toDBid)
import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..)) import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
...@@ -46,21 +50,26 @@ documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExpor ...@@ -46,21 +50,26 @@ documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExpor
-------------------------------------------------- --------------------------------------------------
-- | Hashes are ordered by Set -- | Hashes are ordered by Set
getDocumentsJSON :: NodeId getDocumentsJSON :: IsGargServer env err m
=> NodeId
-- ^ The ID of the target user -- ^ The ID of the target user
-> DocId -> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExport) -> m (Headers '[Header "Content-Disposition" T.Text] DocumentExport)
getDocumentsJSON nodeUserId pId = do getDocumentsJSON nodeUserId pId = do
uId <- view node_user_id <$> getNodeUser nodeUserId dexp <- get_document_json nodeUserId pId
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
pure $ addHeader (T.concat [ "attachment; filename=" pure $ addHeader (T.concat [ "attachment; filename="
, "GarganText_DocsList-" , "GarganText_DocsList-"
, T.pack (show pId) , T.pack (show pId)
, ".json" ]) dexp , ".json" ]) dexp
get_document_json :: IsGargServer err env m => NodeId -> DocId -> m DocumentExport
get_document_json nodeUserId pId = do
uId <- view node_user_id <$> getNodeUser nodeUserId
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
pure DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
where where
mapFacetDoc uId (FacetDoc { .. }) = mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document = Document { _d_document =
...@@ -80,10 +89,11 @@ getDocumentsJSON nodeUserId pId = do ...@@ -80,10 +89,11 @@ getDocumentsJSON nodeUserId pId = do
, _ng_hash = "" } , _ng_hash = "" }
, _d_hash = ""} , _d_hash = ""}
getDocumentsJSONZip :: NodeId getDocumentsJSONZip :: IsGargServer env err m
=> NodeId
-- ^ The Node ID of the target user -- ^ The Node ID of the target user
-> DocId -> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document] -> m (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId dJSON <- getDocumentsJSON userNodeId pId
systime <- liftBase getSystemTime systime <- liftBase getSystemTime
...@@ -98,10 +108,11 @@ getDocumentsJSONZip userNodeId pId = do ...@@ -98,10 +108,11 @@ getDocumentsJSONZip userNodeId pId = do
, dezFileName dexpz , dezFileName dexpz
, ".zip" ]) dexpz , ".zip" ]) dexpz
getDocumentsTSV :: NodeId getDocumentsTSV :: IsGargServer err env m
=> NodeId
-- ^ The Node ID of the target user -- ^ The Node ID of the target user
-> DocId -> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document] -> m (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document]
getDocumentsTSV userNodeId pId = do getDocumentsTSV userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId dJSON <- getDocumentsJSON userNodeId pId
let DocumentExport { _de_documents } = getResponse dJSON let DocumentExport { _de_documents } = getResponse dJSON
......
...@@ -13,12 +13,13 @@ Portability : POSIX ...@@ -13,12 +13,13 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where module Gargantext.API.Node.Document.Export.Types where
import Codec.Serialise.Class hiding (encode)
import Data.Aeson (encode) import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord) import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) ) import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Data.Text qualified as T
import Gargantext.Core.Types ( Node, TODO ) import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
...@@ -36,6 +37,8 @@ data DocumentExport = ...@@ -36,6 +37,8 @@ data DocumentExport =
, _de_garg_version :: Text , _de_garg_version :: Text
} deriving (Generic) } deriving (Generic)
instance Serialise DocumentExport where
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name. -- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data DocumentExportZIP = data DocumentExportZIP =
DocumentExportZIP { _dez_dexp :: DocumentExport DocumentExportZIP { _dez_dexp :: DocumentExport
...@@ -49,6 +52,7 @@ data Document = ...@@ -49,6 +52,7 @@ data Document =
, _d_hash :: Hash , _d_hash :: Hash
} deriving (Generic) } deriving (Generic)
instance Serialise Document where
--instance Read Document where --instance Read Document where
-- read "" = panic "not implemented" -- read "" = panic "not implemented"
instance DefaultOrdered Document where instance DefaultOrdered Document where
...@@ -102,6 +106,8 @@ instance ToParamSchema Document where ...@@ -102,6 +106,8 @@ instance ToParamSchema Document where
instance ToParamSchema Ngrams where instance ToParamSchema Ngrams where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance Serialise Ngrams where
$(deriveJSON (unPrefix "_ng_") ''Ngrams) $(deriveJSON (unPrefix "_ng_") ''Ngrams)
$(deriveJSON (unPrefix "_d_") ''Document) $(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_de_") ''DocumentExport) $(deriveJSON (unPrefix "_de_") ''DocumentExport)
......
...@@ -26,21 +26,39 @@ import Gargantext.API.Admin.Auth ...@@ -26,21 +26,39 @@ import Gargantext.API.Admin.Auth
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..)) import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..))
import Gargantext.API.Auth.PolicyCheck (remoteExportChecks) import Gargantext.API.Auth.PolicyCheck (remoteExportChecks)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Document.Export.Types (DocumentExport)
import Gargantext.API.Prelude (IsGargServer) import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Client (remoteImportClient) import Gargantext.API.Routes.Client (remoteImportClient)
import Gargantext.API.Routes.Named.Remote qualified as Named import Gargantext.API.Routes.Named.Remote qualified as Named
import Gargantext.Core.Config import Gargantext.Core.Config
import Gargantext.Core (lookupDBid) import Gargantext.Core (lookupDBid)
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (IsDBCmd) import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..)) import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..))
import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode) import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode)
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Orphans () import Gargantext.Orphans ()
import GHC.Generics (Generic)
import Prelude import Prelude
import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError) import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError)
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
import Gargantext.Core.Types.Main import Gargantext.API.Node.Document.Export (get_document_json)
data ExportableNode =
ExportableNode {
_en_node :: Node JSON.Value
-- | If this node is a \"docs\" node, remotely export also
-- all the associated documents.
, _en_associated_docs :: Maybe DocumentExport
-- | If this node is a \"terms\" node, remotely export also
-- all the associated ngrams
, _en_associated_ngrams :: Maybe NgramsList
} deriving Generic
instance Serialise ExportableNode where
remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m) remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m)
=> AuthenticatedUser => AuthenticatedUser
...@@ -48,11 +66,11 @@ remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m) ...@@ -48,11 +66,11 @@ remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m)
remoteAPI authenticatedUser = Named.RemoteAPI $ remoteAPI authenticatedUser = Named.RemoteAPI $
Named.RemoteAPI' Named.RemoteAPI'
{ remoteExportEp = \payload@Named.RemoteExportRequest{..} mgr -> { remoteExportEp = \payload@Named.RemoteExportRequest{..} mgr ->
withPolicy authenticatedUser (remoteExportChecks _rer_node_id) (remoteExportHandler payload) mgr withPolicy authenticatedUser (remoteExportChecks _rer_node_id) (remoteExportHandler authenticatedUser payload) mgr
, remoteImportEp = remoteImportHandler authenticatedUser , remoteImportEp = remoteImportHandler authenticatedUser
} }
type ExpectedPayload = Tree (Node JSON.Value) type ExpectedPayload = Tree ExportableNode
remoteImportHandler :: forall err env m. remoteImportHandler :: forall err env m.
(HasNodeError err, HasBackendInternalError err, IsDBCmd env err m, MonadIO m) (HasNodeError err, HasBackendInternalError err, IsDBCmd env err m, MonadIO m)
...@@ -74,14 +92,14 @@ remoteImportHandler loggedInUser c = do ...@@ -74,14 +92,14 @@ remoteImportHandler loggedInUser c = do
foldlM (insertTrees (Just rootNode)) [rootNode] xs foldlM (insertTrees (Just rootNode)) [rootNode] xs
where where
insertNode :: Maybe NodeId -> Node JSON.Value -> m NodeId insertNode :: Maybe NodeId -> ExportableNode -> m NodeId
insertNode mb_parent x = case lookupDBid $ _node_typename x of insertNode mb_parent (ExportableNode x _mb_docs _mb_terms) = case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser) new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
pure new_node pure new_node
insertTrees :: Maybe NodeId -> [NodeId] -> Tree (Node JSON.Value) -> m [NodeId] insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId]
insertTrees currentParent !acc (TreeN x xs) = do insertTrees currentParent !acc (TreeN x xs) = do
childrenRoot <- insertNode currentParent x childrenRoot <- insertNode currentParent x
(`mappend` acc) <$> foldlM (insertTrees (Just childrenRoot)) [childrenRoot] xs (`mappend` acc) <$> foldlM (insertTrees (Just childrenRoot)) [childrenRoot] xs
...@@ -91,15 +109,37 @@ remoteImportHandler loggedInUser c = do ...@@ -91,15 +109,37 @@ remoteImportHandler loggedInUser c = do
remoteExportHandler :: ( MonadIO m, Safe.MonadCatch m remoteExportHandler :: ( MonadIO m, Safe.MonadCatch m
, IsGargServer err env m , IsGargServer err env m
) )
=> Named.RemoteExportRequest => AuthenticatedUser
-> Named.RemoteExportRequest
-> m [NodeId] -> m [NodeId]
remoteExportHandler Named.RemoteExportRequest{..} = do remoteExportHandler loggedInUser Named.RemoteExportRequest{..} = do
mgr <- view gargHttpManager mgr <- view gargHttpManager
nodes <- getNodes _rer_node_id nodes <- getNodes _rer_node_id
checkNodesTypeAllowed nodes checkNodesTypeAllowed nodes
liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder nodes)) (mkClientEnv mgr _rer_instance_url) streamDecode) exportable <- makeExportable (_auth_node_id loggedInUser) nodes
liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder exportable)) (mkClientEnv mgr _rer_instance_url) streamDecode)
`Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e `Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e
makeExportable :: IsGargServer err env m
=> NodeId
-> Tree (Node JSON.Value)
-> m (Tree ExportableNode)
makeExportable userNodeId (TreeN x xs)
| Just nty <- lookupDBid (_node_typename x)
= do
mb_docs <- case nty of
NodeDocument -> Just <$> get_document_json userNodeId (_node_id x)
_ -> pure Nothing
mb_ngrams <- case nty of
NodeList -> Just <$> getNgramsList (_node_id x)
_ -> pure Nothing
let exportableRoot = ExportableNode x mb_docs mb_ngrams
children <- mapM (makeExportable userNodeId) xs
pure $ TreeN exportableRoot children
| otherwise
= throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m () checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m ()
checkNodesTypeAllowed (TreeN r xs) = do checkNodesTypeAllowed (TreeN r xs) = do
checkNodeTypeAllowed r checkNodeTypeAllowed r
...@@ -118,7 +158,7 @@ checkNodeTypeAllowed n ...@@ -118,7 +158,7 @@ checkNodeTypeAllowed n
exportableNodeTypes :: [NodeType] exportableNodeTypes :: [NodeType]
exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo ] exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo ]
streamEncoder :: (MonadIO m, Serialise a) => a -> ConduitT () Named.RemoteBinaryData m () streamEncoder :: MonadIO m => ExpectedPayload -> ConduitT () Named.RemoteBinaryData m ()
streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise
-- | Returns a conduit which can be used to decode -- | Returns a conduit which can be used to decode
......
...@@ -88,6 +88,7 @@ data Ngrams = UnsafeNgrams { _ngramsTerms :: Text ...@@ -88,6 +88,7 @@ data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
deriving (Generic, Show, Eq, Ord) deriving (Generic, Show, Eq, Ord)
instance Hashable Ngrams instance Hashable Ngrams
instance Serialise Ngrams where
makeLenses ''Ngrams makeLenses ''Ngrams
instance PGS.ToRow Ngrams where instance PGS.ToRow Ngrams where
......
...@@ -17,6 +17,7 @@ import Gargantext.Prelude hiding (ByteString) ...@@ -17,6 +17,7 @@ import Gargantext.Prelude hiding (ByteString)
import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix) import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Codec.Serialise.Class hiding (decode)
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text) data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
...@@ -41,6 +42,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T ...@@ -41,6 +42,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T
deriving (Show, Generic) deriving (Show, Generic)
instance NFData HyperdataDocument instance NFData HyperdataDocument
instance Serialise HyperdataDocument
instance HasText HyperdataDocument instance HasText HyperdataDocument
where where
......
...@@ -16,13 +16,14 @@ Portability : POSIX ...@@ -16,13 +16,14 @@ Portability : POSIX
module Gargantext.Database.Schema.Node where module Gargantext.Database.Schema.Node where
import Codec.Serialise
import Codec.CBOR.JSON qualified as CBOR import Codec.CBOR.JSON qualified as CBOR
import Codec.Serialise
import Control.Lens hiding (elements, (&)) import Control.Lens hiding (elements, (&))
import Data.Aeson qualified as JSON import Data.Aeson (ToJSON, toJSON, parseJSON, FromJSON)
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude (NFData(..)) import Gargantext.Prelude (NFData(..))
import Prelude hiding (null, id, map, sum) import Prelude hiding (null, id, map, sum)
import Data.Aeson.Types (parseEither)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Main polymorphic Node definition -- Main polymorphic Node definition
...@@ -57,7 +58,9 @@ instance ( Serialise i ...@@ -57,7 +58,9 @@ instance ( Serialise i
, Serialise p , Serialise p
, Serialise n , Serialise n
, Serialise d , Serialise d
) => Serialise (NodePoly i h t u p n d JSON.Value) where , ToJSON json
, FromJSON json
) => Serialise (NodePoly i h t u p n d json) where
encode Node{..} = encode Node{..} =
encode _node_id <> encode _node_id <>
encode _node_hash_id <> encode _node_hash_id <>
...@@ -66,7 +69,7 @@ instance ( Serialise i ...@@ -66,7 +69,7 @@ instance ( Serialise i
encode _node_parent_id <> encode _node_parent_id <>
encode _node_name <> encode _node_name <>
encode _node_date <> encode _node_date <>
CBOR.encodeValue _node_hyperdata CBOR.encodeValue (toJSON _node_hyperdata)
decode = do decode = do
_node_id <- decode _node_id <- decode
_node_hash_id <- decode _node_hash_id <- decode
...@@ -75,8 +78,10 @@ instance ( Serialise i ...@@ -75,8 +78,10 @@ instance ( Serialise i
_node_parent_id <- decode _node_parent_id <- decode
_node_name <- decode _node_name <- decode
_node_date <- decode _node_date <- decode
_node_hyperdata <- CBOR.decodeValue False mb_node_hyperdata <- parseEither parseJSON <$> CBOR.decodeValue False
pure Node{..} case mb_node_hyperdata of
Left err -> fail err
Right _node_hyperdata -> pure Node{..}
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Automatic instances derivation -- Automatic instances derivation
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment