Commit 1fe60d75 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Refactor exporting and transfering of nodes

parent b39c1805
Pipeline #7233 passed with stages
in 66 minutes and 46 seconds
...@@ -14,9 +14,9 @@ import Codec.Serialise ...@@ -14,9 +14,9 @@ import Codec.Serialise
import Conduit import Conduit
import Control.Exception.Safe qualified as Safe import Control.Exception.Safe qualified as Safe
import Control.Exception (toException) import Control.Exception (toException)
import Control.Lens (view, (#)) import Control.Lens (view, (#), (^.))
import Control.Monad.Except (throwError, MonadError) import Control.Monad.Except (throwError, MonadError)
import Control.Monad (void, liftM2, forM_) import Control.Monad (void, forM_)
import Data.Aeson qualified as JSON import Data.Aeson qualified as JSON
import Data.Aeson.Types qualified as JS import Data.Aeson.Types qualified as JS
import Data.ByteString.Builder qualified as B import Data.ByteString.Builder qualified as B
...@@ -25,6 +25,7 @@ import Data.Conduit.Combinators qualified as C ...@@ -25,6 +25,7 @@ import Data.Conduit.Combinators qualified as C
import Data.Conduit.List qualified as CL import Data.Conduit.List qualified as CL
import Data.Foldable (for_, foldlM) import Data.Foldable (for_, foldlM)
import Data.List.Split qualified as Split import Data.List.Split qualified as Split
import Data.String (IsString(..))
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Auth import Gargantext.API.Admin.Auth
...@@ -39,44 +40,56 @@ import Gargantext.API.Prelude (IsGargServer) ...@@ -39,44 +40,56 @@ import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Client (remoteImportClient) import Gargantext.API.Routes.Client (remoteImportClient)
import Gargantext.API.Routes.Named.Remote qualified as Named import Gargantext.API.Routes.Named.Remote qualified as Named
import Gargantext.Core.Config import Gargantext.Core.Config
import Gargantext.Core.Config.Types (f_write_url)
import Gargantext.Core (lookupDBid) import Gargantext.Core (lookupDBid)
import Gargantext.Core.NLP (HasNLPServer) import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImmediateSaver) import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImmediateSaver)
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Worker.Jobs (sendJob) import Gargantext.Core.Worker.Jobs (sendJob)
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Admin.Types.Hyperdata.Default (DefaultHyperdata(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame (HyperdataFrame(..)) import Gargantext.Database.Admin.Types.Hyperdata.Frame (HyperdataFrame(..))
import Gargantext.Database.Admin.Types.Node hiding (WARNING, INFO) import Gargantext.Database.Admin.Types.Node hiding (ERROR, WARNING, INFO)
import Gargantext.Database.Prelude (IsDBCmd) import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..)) import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..))
import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode) import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode)
import Gargantext.Database.Query.Table.Node qualified as DB
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Orphans () import Gargantext.Orphans ()
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger) import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Network.HTTP.Client qualified as HTTP import Network.HTTP.Client qualified as HTTP
import Prelude import Prelude
import qualified Network.HTTP.Types.Header as HTTP
import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError) import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError)
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
data ExportableNode = ExportableNode data ExportableNode =
{ _en_node :: Node JSON.Value EN_corpus (Node JSON.Value)
, _en_node_payload :: Maybe ExportableNodePayload | EN_graph (Node JSON.Value)
} deriving Generic | EN_phylo (Node JSON.Value)
-- | If this node is a \"docs\" node, remotely export also
data ExportableNodePayload
= -- | If this node is a \"docs\" node, remotely export also
-- all the associated documents. -- all the associated documents.
ENP_document DocumentExport | EN_document (Node JSON.Value) DocumentExport
-- | If this node is a \"terms\" node, remotely export also -- | If this node is a \"terms\" node, remotely export also
-- all the associated ngrams -- all the associated ngrams
| ENP_terms NgramsList | EN_terms (Node JSON.Value) NgramsList
-- | If this node is a \"note\" node, remotely export also -- | For notes nodes we don't have any node to import
-- all the raw markdown blob -- because all the details about the frame service
| ENP_notes T.Text -- would be different at the destination, and have
-- to be recomputed from scratch.
| EN_notes T.Text
deriving Generic deriving Generic
instance Serialise ExportableNodePayload where renderExportableNode :: ExportableNode -> T.Text
renderExportableNode = \case
EN_corpus{} -> "corpus node"
EN_graph{} -> "graph node"
EN_phylo{} -> "phylo node"
EN_document{} -> "document node"
EN_terms{} -> "terms node"
EN_notes{} -> "nodes node"
instance Serialise ExportableNode where instance Serialise ExportableNode where
remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m, HasNodeArchiveStoryImmediateSaver env) remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m, HasNodeArchiveStoryImmediateSaver env)
...@@ -99,6 +112,8 @@ remoteImportHandler :: forall err env m. ...@@ -99,6 +112,8 @@ remoteImportHandler :: forall err env m.
, IsDBCmd env err m , IsDBCmd env err m
, HasNLPServer env , HasNLPServer env
, MonadLogger m , MonadLogger m
, HasConfig env
, HasManager env
, MonadIO m) , MonadIO m)
=> AuthenticatedUser => AuthenticatedUser
-> ConduitT () Named.RemoteBinaryData IO () -> ConduitT () Named.RemoteBinaryData IO ()
...@@ -110,11 +125,11 @@ remoteImportHandler loggedInUser c = do ...@@ -110,11 +125,11 @@ remoteImportHandler loggedInUser c = do
case deserialiseOrFail @ExpectedPayload (B.toLazyByteString $ mconcat chunks) of case deserialiseOrFail @ExpectedPayload (B.toLazyByteString $ mconcat chunks) of
Left err -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "Deserialization error: " ++ show err) Left err -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "Deserialization error: " ++ show err)
Right (TreeN x xs) -> do Right (TreeN x xs) -> do
$(logLocM) INFO $ "Importing " <> T.pack (show $ _node_id $ _en_node $ x) $(logLocM) INFO $ "Importing " <> renderExportableNode x
-- NOTE(adn) By default, we append the imported node(s) to the user's -- NOTE(adn) By default, we append the imported node(s) to the user's
-- private folder. -- private folder.
privateFolderId <- _node_id <$> getUserRootPrivateNode (_auth_user_id loggedInUser) privateFolderId <- _node_id <$> getUserRootPrivateNode (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Attaching " <> T.pack (show $ _node_id $ _en_node $ x) <> " to private folder " <> T.pack (show privateFolderId) $(logLocM) INFO $ "Attaching " <> renderExportableNode x <> " to private folder " <> T.pack (show privateFolderId)
-- Attempts to insert nodes a we go along. -- Attempts to insert nodes a we go along.
rootNode <- insertNode (Just privateFolderId) x rootNode <- insertNode (Just privateFolderId) x
nodes <- foldlM (insertTrees (Just rootNode)) [rootNode] xs nodes <- foldlM (insertTrees (Just rootNode)) [rootNode] xs
...@@ -123,34 +138,62 @@ remoteImportHandler loggedInUser c = do ...@@ -123,34 +138,62 @@ remoteImportHandler loggedInUser c = do
where where
insertNode :: Maybe NodeId -> ExportableNode -> m NodeId insertNode :: Maybe NodeId -> ExportableNode -> m NodeId
insertNode mb_parent (ExportableNode x mb_payload) = case lookupDBid $ _node_typename x of insertNode mb_parent exported_node = case exported_node of
EN_corpus x -> insertSimple mb_parent x
EN_graph x -> insertSimple mb_parent x
EN_phylo x -> insertSimple mb_parent x
EN_notes noteAsMarkdown -> do
case mb_parent of
Nothing ->
throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "No parent id found, I cannot attach this note.")
Just parentId -> do
$(logLocM) INFO $ "Found some markdown notes to import..."
-- NOTE: Unfortunately we cannot rely on the settings that the hyperdata frame
-- is sending us, because both the frame Id and the base URL would be different
-- on the target instance.
mgr <- view gargHttpManager
cfg <- view hasConfig
newHyperdataFrame <- importNote mgr noteAsMarkdown cfg
-- TODO(adn) Import with the valid name.
new_node <- DB.insertNode Notes (Just "Imported note")
(Just $ DefaultFrameCode newHyperdataFrame) parentId (_auth_user_id loggedInUser)
pure new_node
EN_document x docsList -> case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
for_ mb_parent $ \parentId -> do
$(logLocM) INFO $ "Found document list to import..."
let totalDocs = _de_documents docsList
let chunks = Split.chunksOf 100 totalDocs
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
let ws = Jobs.WorkSplit
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
, Jobs._ws_total = length totalDocs
}
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteDocuments payload
pure new_node
EN_terms x ngramsList -> case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
pure new_node
insertSimple :: Maybe ParentId -> Node JSON.Value -> m NodeId
insertSimple mb_parent x = case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser) new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty) $(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
for_ (liftM2 (,) mb_payload mb_parent) $ \(exported_payload, parentId) -> do
case exported_payload of
ENP_document docsList
-> do
$(logLocM) INFO $ "Found document list to import..."
let totalDocs = _de_documents docsList
let chunks = Split.chunksOf 100 totalDocs
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
let ws = Jobs.WorkSplit
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
, Jobs._ws_total = length totalDocs
}
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteDocuments payload
ENP_terms ngramsList
-> do
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
ENP_notes _noteAsMarkdown
-> do
$(logLocM) INFO $ "Found some markdown notes to import"
-- FIXME(adn) actually import the notes
pure new_node pure new_node
insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId] insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId]
...@@ -179,34 +222,76 @@ makeExportable :: (MonadIO m, IsGargServer err env m) ...@@ -179,34 +222,76 @@ makeExportable :: (MonadIO m, IsGargServer err env m)
makeExportable userNodeId (TreeN x xs) makeExportable userNodeId (TreeN x xs)
| Just nty <- lookupDBid (_node_typename x) | Just nty <- lookupDBid (_node_typename x)
= do = do
mb_payload <- case nty of exportableRoot <- case nty of
NodeTexts -> Just . ENP_document <$> get_document_json userNodeId (_node_id x) NodeCorpus -> EN_corpus <$> pure x
NodeList -> Just . ENP_terms <$> getNgramsList (_node_id x) NodeGraph -> EN_graph <$> pure x
Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of NodePhylo -> EN_phylo <$> pure x
NodeTexts -> EN_document <$> pure x <*> get_document_json userNodeId (_node_id x)
NodeList -> EN_terms <$> pure x <*> getNgramsList (_node_id x)
Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of
Nothing Nothing
-> pure Nothing -> mk_err " invalid HyperdataFrame inside."
Just hframe Just hframe
-> do -> do
mgr <- view gargHttpManager mgr <- view gargHttpManager
tryExportNode mgr hframe exportNote mgr hframe
_ -> pure Nothing _ -> mk_err $ "invalid (unsupported) note type: " <> show nty
let exportableRoot = ExportableNode x mb_payload
children <- mapM (makeExportable userNodeId) xs children <- mapM (makeExportable userNodeId) xs
pure $ TreeN exportableRoot children pure $ TreeN exportableRoot children
| otherwise | otherwise
= throwError $ _BackendInternalError = throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
where
mk_err msg =
throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with " <> msg)
tryExportNode :: (MonadIO m, MonadLogger m) => HTTP.Manager -> HyperdataFrame -> m (Maybe ExportableNodePayload) exportNote :: ( IsGargServer err env m, MonadIO m, MonadLogger m)
tryExportNode mgr HyperdataFrame{..} = do => HTTP.Manager -> HyperdataFrame -> m ExportableNode
exportNote mgr HyperdataFrame{..} = do
let download_url = _hf_base <> "/" <> _hf_frame_id <> "/download" let download_url = _hf_base <> "/" <> _hf_frame_id <> "/download"
case HTTP.parseRequest (T.unpack download_url) of case HTTP.parseRequest (T.unpack download_url) of
Left err -> do Left err -> do
$(logLocM) WARNING $ "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err) let msg = "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err)
pure Nothing $(logLocM) ERROR msg
mk_err (T.unpack msg)
Right rq -> do Right rq -> do
res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr) res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr)
pure $ Just $ ENP_notes (TE.decodeUtf8 $ BL.toStrict $ res) pure $ EN_notes (TE.decodeUtf8 $ BL.toStrict $ res)
where
mk_err msg =
throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "exportNote: " <> msg)
importNote :: (MonadIO m, MonadLogger m, HasBackendInternalError err, IsDBCmd env err m)
=> HTTP.Manager
-> T.Text
-> GargConfig
-> m HyperdataFrame
importNote mgr rawText cfg = do
let _hf_base = cfg ^. gc_frames . f_write_url
case HTTP.parseRequest (T.unpack _hf_base) of
Left err -> do
let msg = "Couldn't extract a valid URL from " <> _hf_base <> ", " <> T.pack (show err)
$(logLocM) ERROR msg
mk_err (T.unpack msg)
Right rq0 -> do
let rq = rq0 { HTTP.method = "POST"
, HTTP.requestHeaders = textMarkdown : (HTTP.requestHeaders rq0)
, HTTP.requestBody = HTTP.RequestBodyBS (TE.encodeUtf8 rawText)
}
-- The response will contain the new path to the notes, where the last fragment
-- is the frameId
res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr)
let _hf_frame_id = snd $ T.breakOnEnd "/" (TE.decodeUtf8 $ BL.toStrict res)
pure $ HyperdataFrame{..}
where
mk_err msg =
throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "importNote: " <> msg)
textMarkdown :: HTTP.Header
textMarkdown = (HTTP.hContentType, fromString "text/markdown")
checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m () checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m ()
checkNodesTypeAllowed (TreeN r xs) = do checkNodesTypeAllowed (TreeN r xs) = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment