{-# LANGUAGE BangPatterns #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} module Gargantext.API.Server.Named.Remote ( remoteExportAPI , remoteImportAPI ) where import Codec.Serialise import Conduit import Control.Exception.Safe qualified as Safe import Control.Exception (toException) import Control.Lens (view, (#), (^.)) import Control.Monad.Except (throwError) import Control.Monad (void, forM_) import Data.Aeson qualified as JSON import Data.Aeson.Types qualified as JS import Data.ByteString.Builder qualified as B import Data.ByteString.Lazy qualified as BL import Data.Conduit.Combinators qualified as C import Data.Conduit.List qualified as CL import Data.Foldable (for_, foldlM) import Data.List qualified as List import Data.List.Split qualified as Split import Data.Monoid import Data.String (IsString(..)) import Data.Text.Encoding qualified as TE import Data.Text qualified as T import Gargantext.API.Admin.Auth import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..)) import Gargantext.API.Auth.PolicyCheck (remoteExportChecks) import Gargantext.API.Errors.Types import Gargantext.API.Ngrams.Prelude (getNgramsList) import Gargantext.API.Ngrams.Types (NgramsList) import Gargantext.API.Node.Document.Export (get_document_json) import Gargantext.API.Node.Document.Export.Types import Gargantext.API.Prelude (IsGargServer) import Gargantext.API.Routes.Client (remoteImportClient) import Gargantext.API.Routes.Named.Remote qualified as Named import Gargantext.Core.Config import Gargantext.Core.Config.Types (f_write_url) import Gargantext.Core (lookupDBid) import Gargantext.Core.NLP (HasNLPServer) import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv (..)) import Gargantext.Core.Types.Main import Gargantext.Core.Worker.Jobs (sendJob) import Gargantext.Core.Worker.Jobs.Types qualified as Jobs import Gargantext.Database.Admin.Types.Hyperdata.Default (DefaultHyperdata(..)) import Gargantext.Database.Admin.Types.Hyperdata.Frame (HyperdataFrame(..)) import Gargantext.Database.Admin.Types.Node hiding (ERROR, WARNING, INFO) import Gargantext.Database.Prelude import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..)) import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode) import Gargantext.Database.Query.Table.Node qualified as DB import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Orphans () import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger) import GHC.Generics (Generic) import Network.HTTP.Client qualified as HTTP import Network.HTTP.Types.Header qualified as HTTP import Prelude import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError) import Servant.Server.Generic (AsServerT) data ExportableNode = EN_corpus (Node JSON.Value) | EN_graph (Node JSON.Value) | EN_phylo (Node JSON.Value) -- | If this node is a \"docs\" node, remotely export also -- all the associated documents. | EN_document (Node JSON.Value) DocumentExport -- | If this node is a \"terms\" node, remotely export also -- all the associated ngrams | EN_terms (Node JSON.Value) NgramsList -- | For notes nodes we don't have any node to import -- because all the details about the frame service -- would be different at the destination, and have -- to be recomputed from scratch. | EN_notes T.Text deriving Generic renderExportableNode :: ExportableNode -> T.Text renderExportableNode = \case EN_corpus{} -> "corpus node" EN_graph{} -> "graph node" EN_phylo{} -> "phylo node" EN_document{} -> "document node" EN_terms{} -> "terms node" EN_notes{} -> "nodes node" instance Serialise ExportableNode where remoteExportAPI :: (MonadIO m, IsGargServer env BackendInternalError m) => NodeId -> AuthenticatedUser -> Named.RemoteExportAPI (AsServerT m) remoteExportAPI nodeId authenticatedUser = Named.RemoteExportAPI { remoteExportEp = \payload mgr -> withPolicy authenticatedUser (remoteExportChecks nodeId) (remoteExportHandler nodeId authenticatedUser payload) mgr } remoteImportAPI :: (MonadIO m, IsGargServer env BackendInternalError m) => AuthenticatedUser -> Named.RemoteImportAPI (AsServerT m) remoteImportAPI authenticatedUser = Named.RemoteImportAPI { remoteImportEp = remoteImportHandler authenticatedUser } type ExpectedPayload = Tree ExportableNode remoteImportHandler :: forall err env m. ( HasNodeStoryEnv env err , HasNodeError err , HasBackendInternalError err , IsDBCmd env err m , HasNLPServer env , MonadLogger m , HasConfig env , HasManager env , MonadIO m) => AuthenticatedUser -> ConduitT () Named.RemoteBinaryData IO () -> m [NodeId] remoteImportHandler loggedInUser c = do chunks <- liftIO $ sourceToList $ c .| C.map (B.byteString . Named.getRemoteBinaryData) -- FIXME(adn): We have to find a way to deserialise this into a streaming fashion and -- attempt insertion one element of the list at the time. case deserialiseOrFail @ExpectedPayload (B.toLazyByteString $ mconcat chunks) of Left err -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "Deserialization error: " ++ show err) Right (TreeN x xs) -> do $(logLocM) INFO $ "Importing " <> renderExportableNode x -- NOTE(adn) By default, we append the imported node(s) to the user's -- private folder. privateFolderId <- _node_id <$> runDBQuery (getUserRootPrivateNode (_auth_user_id loggedInUser)) $(logLocM) INFO $ "Attaching " <> renderExportableNode x <> " to private folder " <> T.pack (show privateFolderId) -- Attempts to insert nodes a we go along. rootNode <- insertNode (Just privateFolderId) x nodes <- foldlM (insertTrees (Just rootNode)) [rootNode] xs $(logLocM) INFO $ "Successfully imported all the requested nodes." pure nodes where insertNode :: Maybe NodeId -> ExportableNode -> m NodeId insertNode mb_parent exported_node = case exported_node of EN_corpus x -> insertSimple mb_parent x EN_graph x -> insertSimple mb_parent x EN_phylo x -> insertSimple mb_parent x EN_notes noteAsMarkdown -> do case mb_parent of Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "No parent id found, I cannot attach this note.") Just parentId -> do $(logLocM) INFO $ "Found some markdown notes to import..." -- NOTE: Unfortunately we cannot rely on the settings that the hyperdata frame -- is sending us, because both the frame Id and the base URL would be different -- on the target instance. mgr <- view gargHttpManager cfg <- view hasConfig newHyperdataFrame <- importNote mgr noteAsMarkdown cfg -- TODO(adn) Import with the valid name. new_node <- runDBTx $ DB.insertNode Notes (Just "Imported note") (Just $ DefaultFrameCode newHyperdataFrame) parentId (_auth_user_id loggedInUser) pure new_node EN_document x docsList -> case lookupDBid $ _node_typename x of Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") Just ty -> do new_node <- runDBTx $ insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser) $(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty) for_ mb_parent $ \parentId -> do $(logLocM) INFO $ "Found document list to import..." let totalDocs = _de_documents docsList let chunks = Split.chunksOf 100 totalDocs forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do let ws = Jobs.WorkSplit { Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk) , Jobs._ws_total = length totalDocs } let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws void $ sendJob $ Jobs.ImportRemoteDocuments payload pure new_node EN_terms x ngramsList -> case lookupDBid $ _node_typename x of Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") Just ty -> do new_node <- runDBTx $ insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser) $(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty) $(logLocM) INFO $ "Found ngrams list to import..." void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList pure new_node insertSimple :: Maybe ParentId -> Node JSON.Value -> m NodeId insertSimple mb_parent x = case lookupDBid $ _node_typename x of Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") Just ty -> do new_node <- runDBTx $ insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser) $(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty) pure new_node insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId] insertTrees currentParent !acc (TreeN x xs) = do childrenRoot <- insertNode currentParent x (`mappend` acc) <$> foldlM (insertTrees (Just childrenRoot)) [childrenRoot] xs remoteExportHandler :: ( MonadIO m, Safe.MonadCatch m , IsGargServer err env m ) => NodeId -> AuthenticatedUser -> Named.RemoteExportRequest -> m [NodeId] remoteExportHandler _rer_node_id loggedInUser Named.RemoteExportRequest{..} = do mgr <- view gargHttpManager nodes <- runDBQuery $ do ns <- getNodes _rer_node_id checkNodesTypeAllowed ns pure ns exportable <- makeExportable (_auth_node_id loggedInUser) nodes liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder exportable)) (mkClientEnv mgr _rer_instance_url) streamDecode) `Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e makeExportable :: (MonadIO m, IsGargServer err env m) => NodeId -> Tree (Node JSON.Value) -> m (Tree ExportableNode) makeExportable userNodeId (TreeN x xs) | Just nty <- lookupDBid (_node_typename x) = do env <- view hasNodeStory exportableRoot <- case nty of NodeCorpus -> EN_corpus <$> pure x NodeGraph -> EN_graph <$> pure x NodePhylo -> EN_phylo <$> pure x NodeTexts -> EN_document <$> pure x <*> get_document_json userNodeId (_node_id x) NodeList -> EN_terms <$> pure x <*> runDBQuery (getNgramsList env (_node_id x)) Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of Nothing -> mk_err " invalid HyperdataFrame inside." Just hframe -> do mgr <- view gargHttpManager exportNote mgr hframe _ -> mk_err $ "invalid (unsupported) note type: " <> show nty children <- mapM (makeExportable userNodeId) xs pure $ TreeN exportableRoot children | otherwise = throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") where mk_err msg = throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with " <> msg) exportNote :: ( IsGargServer err env m, MonadIO m, MonadLogger m) => HTTP.Manager -> HyperdataFrame -> m ExportableNode exportNote mgr HyperdataFrame{..} = do let download_url = _hf_base <> "/" <> _hf_frame_id <> "/download" case HTTP.parseRequest (T.unpack download_url) of Left err -> do let msg = "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err) $(logLocM) ERROR msg mk_err (T.unpack msg) Right rq -> do res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr) pure $ EN_notes (TE.decodeUtf8 $ BL.toStrict $ res) where mk_err msg = throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "exportNote: " <> msg) importNote :: (MonadIO m, MonadLogger m, HasBackendInternalError err, IsDBCmd env err m) => HTTP.Manager -> T.Text -> GargConfig -> m HyperdataFrame importNote mgr rawText cfg = do let _hf_base = cfg ^. gc_frames . f_write_url case HTTP.parseRequest (T.unpack _hf_base `appendPath` "/new") of Left err -> do let msg = "Couldn't extract a valid URL from " <> _hf_base <> ", " <> T.pack (show err) $(logLocM) ERROR msg mk_err (T.unpack msg) Right rq0 -> do let rq = rq0 { HTTP.method = "POST" , HTTP.requestHeaders = textMarkdown : (HTTP.requestHeaders rq0) , HTTP.requestBody = HTTP.RequestBodyBS (TE.encodeUtf8 rawText) } -- The response will contain (in the redirects) the new path to the notes, where the last fragment -- is the frameId res <- liftIO $ HTTP.withResponseHistory rq mgr $ \redirects -> do let allLocations = map (First . List.lookup HTTP.hLocation . HTTP.responseHeaders . snd) (HTTP.hrRedirects redirects) case getFirst $ mconcat allLocations of Nothing -> pure mempty Just x -> pure x let _hf_frame_id = snd $ T.breakOnEnd "/" (TE.decodeUtf8 res) pure $ HyperdataFrame{..} where mk_err msg = throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "importNote: " <> msg) textMarkdown :: HTTP.Header textMarkdown = (HTTP.hContentType, fromString "text/markdown") -- | Append two URL paths together. The second argument must be given with an initial '/', -- and must be non-null. appendPath :: String -> String -> String appendPath t r = case List.last t of '/' -> t <> List.tail r _ -> t <> r checkNodesTypeAllowed :: HasNodeError e => Tree (Node a) -> DBQuery e x () checkNodesTypeAllowed (TreeN r xs) = do checkNodeTypeAllowed r mapM_ checkNodesTypeAllowed xs checkNodeTypeAllowed :: HasNodeError e => Node a -> DBQuery e x () checkNodeTypeAllowed n | Just nty <- lookupDBid (_node_typename n) , nty `elem` exportableNodeTypes = pure () | otherwise = let msg = "It's possible to export only the following node of type: " <> T.intercalate "," (map (T.pack . show) exportableNodeTypes) in nodeError $ NodeNotExportable (_node_id n) msg -- | At the moment we support only export corpus nodes and their children (i.e. "Docs", "Terms", "Graph"). exportableNodeTypes :: [NodeType] exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo, Notes ] streamEncoder :: MonadIO m => ExpectedPayload -> ConduitT () Named.RemoteBinaryData m () streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise -- | Returns a conduit which can be used to decode streamDecode :: Either ClientError [NodeId] -> IO [NodeId] streamDecode = \case Left err -> Safe.throwIO $ InternalUnexpectedError (toException $ userError $ show err) Right x -> pure x