Commit b39c1805 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Preliminary work to transfer notes

parent b2f7a9a8
Pipeline #7226 passed with stages
in 72 minutes and 19 seconds
......@@ -18,12 +18,14 @@ import Control.Lens (view, (#))
import Control.Monad.Except (throwError, MonadError)
import Control.Monad (void, liftM2, forM_)
import Data.Aeson qualified as JSON
import Data.Aeson.Types qualified as JS
import Data.ByteString.Builder qualified as B
import Data.ByteString.Lazy qualified as BL
import Data.Conduit.Combinators qualified as C
import Data.Conduit.List qualified as CL
import Data.Foldable (for_, foldlM)
import Data.List.Split qualified as Split
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T
import Gargantext.API.Admin.Auth
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..))
......@@ -43,7 +45,8 @@ import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImme
import Gargantext.Core.Types.Main
import Gargantext.Core.Worker.Jobs (sendJob)
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Admin.Types.Node hiding (INFO)
import Gargantext.Database.Admin.Types.Hyperdata.Frame (HyperdataFrame(..))
import Gargantext.Database.Admin.Types.Node hiding (WARNING, INFO)
import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..))
import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode)
......@@ -51,21 +54,29 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Orphans ()
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import GHC.Generics (Generic)
import Network.HTTP.Client qualified as HTTP
import Prelude
import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError)
import Servant.Server.Generic (AsServerT)
data ExportableNode =
ExportableNode {
_en_node :: Node JSON.Value
-- | If this node is a \"docs\" node, remotely export also
data ExportableNode = ExportableNode
{ _en_node :: Node JSON.Value
, _en_node_payload :: Maybe ExportableNodePayload
} deriving Generic
data ExportableNodePayload
= -- | If this node is a \"docs\" node, remotely export also
-- all the associated documents.
, _en_associated_docs :: Maybe DocumentExport
ENP_document DocumentExport
-- | If this node is a \"terms\" node, remotely export also
-- all the associated ngrams
, _en_associated_ngrams :: Maybe NgramsList
} deriving Generic
| ENP_terms NgramsList
-- | If this node is a \"note\" node, remotely export also
-- all the raw markdown blob
| ENP_notes T.Text
deriving Generic
instance Serialise ExportableNodePayload where
instance Serialise ExportableNode where
remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m, HasNodeArchiveStoryImmediateSaver env)
......@@ -112,25 +123,34 @@ remoteImportHandler loggedInUser c = do
where
insertNode :: Maybe NodeId -> ExportableNode -> m NodeId
insertNode mb_parent (ExportableNode x mb_docs mb_terms) = case lookupDBid $ _node_typename x of
insertNode mb_parent (ExportableNode x mb_payload) = case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
for_ (liftM2 (,) mb_docs mb_parent) $ \(docsList, parentId) -> do
$(logLocM) INFO $ "Found document list to import..."
let totalDocs = _de_documents docsList
let chunks = Split.chunksOf 100 totalDocs
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
let ws = Jobs.WorkSplit
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
, Jobs._ws_total = length totalDocs
}
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteDocuments payload
for_ mb_terms $ \ngramsList -> do
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
for_ (liftM2 (,) mb_payload mb_parent) $ \(exported_payload, parentId) -> do
case exported_payload of
ENP_document docsList
-> do
$(logLocM) INFO $ "Found document list to import..."
let totalDocs = _de_documents docsList
let chunks = Split.chunksOf 100 totalDocs
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
let ws = Jobs.WorkSplit
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
, Jobs._ws_total = length totalDocs
}
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteDocuments payload
ENP_terms ngramsList
-> do
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
ENP_notes _noteAsMarkdown
-> do
$(logLocM) INFO $ "Found some markdown notes to import"
-- FIXME(adn) actually import the notes
pure new_node
insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId]
......@@ -152,26 +172,42 @@ remoteExportHandler loggedInUser Named.RemoteExportRequest{..} = do
liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder exportable)) (mkClientEnv mgr _rer_instance_url) streamDecode)
`Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e
makeExportable :: IsGargServer err env m
makeExportable :: (MonadIO m, IsGargServer err env m)
=> NodeId
-> Tree (Node JSON.Value)
-> m (Tree ExportableNode)
makeExportable userNodeId (TreeN x xs)
| Just nty <- lookupDBid (_node_typename x)
= do
mb_docs <- case nty of
NodeTexts -> Just <$> get_document_json userNodeId (_node_id x)
mb_payload <- case nty of
NodeTexts -> Just . ENP_document <$> get_document_json userNodeId (_node_id x)
NodeList -> Just . ENP_terms <$> getNgramsList (_node_id x)
Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of
Nothing
-> pure Nothing
Just hframe
-> do
mgr <- view gargHttpManager
tryExportNode mgr hframe
_ -> pure Nothing
mb_ngrams <- case nty of
NodeList -> Just <$> getNgramsList (_node_id x)
_ -> pure Nothing
let exportableRoot = ExportableNode x mb_docs mb_ngrams
let exportableRoot = ExportableNode x mb_payload
children <- mapM (makeExportable userNodeId) xs
pure $ TreeN exportableRoot children
| otherwise
= throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
tryExportNode :: (MonadIO m, MonadLogger m) => HTTP.Manager -> HyperdataFrame -> m (Maybe ExportableNodePayload)
tryExportNode mgr HyperdataFrame{..} = do
let download_url = _hf_base <> "/" <> _hf_frame_id <> "/download"
case HTTP.parseRequest (T.unpack download_url) of
Left err -> do
$(logLocM) WARNING $ "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err)
pure Nothing
Right rq -> do
res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr)
pure $ Just $ ENP_notes (TE.decodeUtf8 $ BL.toStrict $ res)
checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m ()
checkNodesTypeAllowed (TreeN r xs) = do
checkNodeTypeAllowed r
......@@ -188,7 +224,7 @@ checkNodeTypeAllowed n
-- | At the moment we support only export corpus nodes and their children (i.e. "Docs", "Terms", "Graph").
exportableNodeTypes :: [NodeType]
exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo ]
exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo, Notes ]
streamEncoder :: MonadIO m => ExpectedPayload -> ConduitT () Named.RemoteBinaryData m ()
streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment