Commit 1fe60d75 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Refactor exporting and transfering of nodes

parent b39c1805
Pipeline #7233 passed with stages
in 66 minutes and 46 seconds
......@@ -14,9 +14,9 @@ import Codec.Serialise
import Conduit
import Control.Exception.Safe qualified as Safe
import Control.Exception (toException)
import Control.Lens (view, (#))
import Control.Lens (view, (#), (^.))
import Control.Monad.Except (throwError, MonadError)
import Control.Monad (void, liftM2, forM_)
import Control.Monad (void, forM_)
import Data.Aeson qualified as JSON
import Data.Aeson.Types qualified as JS
import Data.ByteString.Builder qualified as B
......@@ -25,6 +25,7 @@ import Data.Conduit.Combinators qualified as C
import Data.Conduit.List qualified as CL
import Data.Foldable (for_, foldlM)
import Data.List.Split qualified as Split
import Data.String (IsString(..))
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T
import Gargantext.API.Admin.Auth
......@@ -39,44 +40,56 @@ import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Client (remoteImportClient)
import Gargantext.API.Routes.Named.Remote qualified as Named
import Gargantext.Core.Config
import Gargantext.Core.Config.Types (f_write_url)
import Gargantext.Core (lookupDBid)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImmediateSaver)
import Gargantext.Core.Types.Main
import Gargantext.Core.Worker.Jobs (sendJob)
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Admin.Types.Hyperdata.Default (DefaultHyperdata(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame (HyperdataFrame(..))
import Gargantext.Database.Admin.Types.Node hiding (WARNING, INFO)
import Gargantext.Database.Admin.Types.Node hiding (ERROR, WARNING, INFO)
import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..))
import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode)
import Gargantext.Database.Query.Table.Node qualified as DB
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Orphans ()
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import GHC.Generics (Generic)
import Network.HTTP.Client qualified as HTTP
import Prelude
import qualified Network.HTTP.Types.Header as HTTP
import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError)
import Servant.Server.Generic (AsServerT)
data ExportableNode = ExportableNode
{ _en_node :: Node JSON.Value
, _en_node_payload :: Maybe ExportableNodePayload
} deriving Generic
data ExportableNodePayload
= -- | If this node is a \"docs\" node, remotely export also
data ExportableNode =
EN_corpus (Node JSON.Value)
| EN_graph (Node JSON.Value)
| EN_phylo (Node JSON.Value)
-- | If this node is a \"docs\" node, remotely export also
-- all the associated documents.
ENP_document DocumentExport
| EN_document (Node JSON.Value) DocumentExport
-- | If this node is a \"terms\" node, remotely export also
-- all the associated ngrams
| ENP_terms NgramsList
-- | If this node is a \"note\" node, remotely export also
-- all the raw markdown blob
| ENP_notes T.Text
| EN_terms (Node JSON.Value) NgramsList
-- | For notes nodes we don't have any node to import
-- because all the details about the frame service
-- would be different at the destination, and have
-- to be recomputed from scratch.
| EN_notes T.Text
deriving Generic
instance Serialise ExportableNodePayload where
renderExportableNode :: ExportableNode -> T.Text
renderExportableNode = \case
EN_corpus{} -> "corpus node"
EN_graph{} -> "graph node"
EN_phylo{} -> "phylo node"
EN_document{} -> "document node"
EN_terms{} -> "terms node"
EN_notes{} -> "nodes node"
instance Serialise ExportableNode where
remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m, HasNodeArchiveStoryImmediateSaver env)
......@@ -99,6 +112,8 @@ remoteImportHandler :: forall err env m.
, IsDBCmd env err m
, HasNLPServer env
, MonadLogger m
, HasConfig env
, HasManager env
, MonadIO m)
=> AuthenticatedUser
-> ConduitT () Named.RemoteBinaryData IO ()
......@@ -110,11 +125,11 @@ remoteImportHandler loggedInUser c = do
case deserialiseOrFail @ExpectedPayload (B.toLazyByteString $ mconcat chunks) of
Left err -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "Deserialization error: " ++ show err)
Right (TreeN x xs) -> do
$(logLocM) INFO $ "Importing " <> T.pack (show $ _node_id $ _en_node $ x)
$(logLocM) INFO $ "Importing " <> renderExportableNode x
-- NOTE(adn) By default, we append the imported node(s) to the user's
-- private folder.
privateFolderId <- _node_id <$> getUserRootPrivateNode (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Attaching " <> T.pack (show $ _node_id $ _en_node $ x) <> " to private folder " <> T.pack (show privateFolderId)
$(logLocM) INFO $ "Attaching " <> renderExportableNode x <> " to private folder " <> T.pack (show privateFolderId)
-- Attempts to insert nodes a we go along.
rootNode <- insertNode (Just privateFolderId) x
nodes <- foldlM (insertTrees (Just rootNode)) [rootNode] xs
......@@ -123,34 +138,62 @@ remoteImportHandler loggedInUser c = do
where
insertNode :: Maybe NodeId -> ExportableNode -> m NodeId
insertNode mb_parent (ExportableNode x mb_payload) = case lookupDBid $ _node_typename x of
insertNode mb_parent exported_node = case exported_node of
EN_corpus x -> insertSimple mb_parent x
EN_graph x -> insertSimple mb_parent x
EN_phylo x -> insertSimple mb_parent x
EN_notes noteAsMarkdown -> do
case mb_parent of
Nothing ->
throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "No parent id found, I cannot attach this note.")
Just parentId -> do
$(logLocM) INFO $ "Found some markdown notes to import..."
-- NOTE: Unfortunately we cannot rely on the settings that the hyperdata frame
-- is sending us, because both the frame Id and the base URL would be different
-- on the target instance.
mgr <- view gargHttpManager
cfg <- view hasConfig
newHyperdataFrame <- importNote mgr noteAsMarkdown cfg
-- TODO(adn) Import with the valid name.
new_node <- DB.insertNode Notes (Just "Imported note")
(Just $ DefaultFrameCode newHyperdataFrame) parentId (_auth_user_id loggedInUser)
pure new_node
EN_document x docsList -> case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
for_ mb_parent $ \parentId -> do
$(logLocM) INFO $ "Found document list to import..."
let totalDocs = _de_documents docsList
let chunks = Split.chunksOf 100 totalDocs
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
let ws = Jobs.WorkSplit
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
, Jobs._ws_total = length totalDocs
}
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteDocuments payload
pure new_node
EN_terms x ngramsList -> case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
pure new_node
insertSimple :: Maybe ParentId -> Node JSON.Value -> m NodeId
insertSimple mb_parent x = case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
for_ (liftM2 (,) mb_payload mb_parent) $ \(exported_payload, parentId) -> do
case exported_payload of
ENP_document docsList
-> do
$(logLocM) INFO $ "Found document list to import..."
let totalDocs = _de_documents docsList
let chunks = Split.chunksOf 100 totalDocs
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
let ws = Jobs.WorkSplit
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
, Jobs._ws_total = length totalDocs
}
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteDocuments payload
ENP_terms ngramsList
-> do
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
ENP_notes _noteAsMarkdown
-> do
$(logLocM) INFO $ "Found some markdown notes to import"
-- FIXME(adn) actually import the notes
pure new_node
insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId]
......@@ -179,34 +222,76 @@ makeExportable :: (MonadIO m, IsGargServer err env m)
makeExportable userNodeId (TreeN x xs)
| Just nty <- lookupDBid (_node_typename x)
= do
mb_payload <- case nty of
NodeTexts -> Just . ENP_document <$> get_document_json userNodeId (_node_id x)
NodeList -> Just . ENP_terms <$> getNgramsList (_node_id x)
Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of
exportableRoot <- case nty of
NodeCorpus -> EN_corpus <$> pure x
NodeGraph -> EN_graph <$> pure x
NodePhylo -> EN_phylo <$> pure x
NodeTexts -> EN_document <$> pure x <*> get_document_json userNodeId (_node_id x)
NodeList -> EN_terms <$> pure x <*> getNgramsList (_node_id x)
Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of
Nothing
-> pure Nothing
-> mk_err " invalid HyperdataFrame inside."
Just hframe
-> do
mgr <- view gargHttpManager
tryExportNode mgr hframe
_ -> pure Nothing
let exportableRoot = ExportableNode x mb_payload
exportNote mgr hframe
_ -> mk_err $ "invalid (unsupported) note type: " <> show nty
children <- mapM (makeExportable userNodeId) xs
pure $ TreeN exportableRoot children
| otherwise
= throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
where
mk_err msg =
throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with " <> msg)
tryExportNode :: (MonadIO m, MonadLogger m) => HTTP.Manager -> HyperdataFrame -> m (Maybe ExportableNodePayload)
tryExportNode mgr HyperdataFrame{..} = do
exportNote :: ( IsGargServer err env m, MonadIO m, MonadLogger m)
=> HTTP.Manager -> HyperdataFrame -> m ExportableNode
exportNote mgr HyperdataFrame{..} = do
let download_url = _hf_base <> "/" <> _hf_frame_id <> "/download"
case HTTP.parseRequest (T.unpack download_url) of
Left err -> do
$(logLocM) WARNING $ "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err)
pure Nothing
let msg = "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err)
$(logLocM) ERROR msg
mk_err (T.unpack msg)
Right rq -> do
res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr)
pure $ Just $ ENP_notes (TE.decodeUtf8 $ BL.toStrict $ res)
pure $ EN_notes (TE.decodeUtf8 $ BL.toStrict $ res)
where
mk_err msg =
throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "exportNote: " <> msg)
importNote :: (MonadIO m, MonadLogger m, HasBackendInternalError err, IsDBCmd env err m)
=> HTTP.Manager
-> T.Text
-> GargConfig
-> m HyperdataFrame
importNote mgr rawText cfg = do
let _hf_base = cfg ^. gc_frames . f_write_url
case HTTP.parseRequest (T.unpack _hf_base) of
Left err -> do
let msg = "Couldn't extract a valid URL from " <> _hf_base <> ", " <> T.pack (show err)
$(logLocM) ERROR msg
mk_err (T.unpack msg)
Right rq0 -> do
let rq = rq0 { HTTP.method = "POST"
, HTTP.requestHeaders = textMarkdown : (HTTP.requestHeaders rq0)
, HTTP.requestBody = HTTP.RequestBodyBS (TE.encodeUtf8 rawText)
}
-- The response will contain the new path to the notes, where the last fragment
-- is the frameId
res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr)
let _hf_frame_id = snd $ T.breakOnEnd "/" (TE.decodeUtf8 $ BL.toStrict res)
pure $ HyperdataFrame{..}
where
mk_err msg =
throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "importNote: " <> msg)
textMarkdown :: HTTP.Header
textMarkdown = (HTTP.hContentType, fromString "text/markdown")
checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m ()
checkNodesTypeAllowed (TreeN r xs) = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment