Commit b39c1805 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Preliminary work to transfer notes

parent b2f7a9a8
Pipeline #7226 passed with stages
in 72 minutes and 19 seconds
...@@ -18,12 +18,14 @@ import Control.Lens (view, (#)) ...@@ -18,12 +18,14 @@ import Control.Lens (view, (#))
import Control.Monad.Except (throwError, MonadError) import Control.Monad.Except (throwError, MonadError)
import Control.Monad (void, liftM2, forM_) import Control.Monad (void, liftM2, forM_)
import Data.Aeson qualified as JSON import Data.Aeson qualified as JSON
import Data.Aeson.Types qualified as JS
import Data.ByteString.Builder qualified as B import Data.ByteString.Builder qualified as B
import Data.ByteString.Lazy qualified as BL import Data.ByteString.Lazy qualified as BL
import Data.Conduit.Combinators qualified as C import Data.Conduit.Combinators qualified as C
import Data.Conduit.List qualified as CL import Data.Conduit.List qualified as CL
import Data.Foldable (for_, foldlM) import Data.Foldable (for_, foldlM)
import Data.List.Split qualified as Split import Data.List.Split qualified as Split
import Data.Text.Encoding qualified as TE
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Auth import Gargantext.API.Admin.Auth
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..)) import Gargantext.API.Admin.Auth.Types (AuthenticatedUser(..))
...@@ -43,7 +45,8 @@ import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImme ...@@ -43,7 +45,8 @@ import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, HasNodeArchiveStoryImme
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Worker.Jobs (sendJob) import Gargantext.Core.Worker.Jobs (sendJob)
import Gargantext.Core.Worker.Jobs.Types qualified as Jobs import Gargantext.Core.Worker.Jobs.Types qualified as Jobs
import Gargantext.Database.Admin.Types.Node hiding (INFO) import Gargantext.Database.Admin.Types.Hyperdata.Frame (HyperdataFrame(..))
import Gargantext.Database.Admin.Types.Node hiding (WARNING, INFO)
import Gargantext.Database.Prelude (IsDBCmd) import Gargantext.Database.Prelude (IsDBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..)) import Gargantext.Database.Query.Table.Node.Error (HasNodeError, nodeError, NodeError (..))
import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode) import Gargantext.Database.Query.Table.Node (insertNodeWithHyperdata, getNodes, getUserRootPrivateNode)
...@@ -51,21 +54,29 @@ import Gargantext.Database.Schema.Node (NodePoly(..)) ...@@ -51,21 +54,29 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Orphans () import Gargantext.Orphans ()
import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger) import Gargantext.System.Logging (logLocM, LogLevel(..), MonadLogger)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Network.HTTP.Client qualified as HTTP
import Prelude import Prelude
import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError) import Servant.Client.Streaming (mkClientEnv, withClientM, ClientError)
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
data ExportableNode = data ExportableNode = ExportableNode
ExportableNode { { _en_node :: Node JSON.Value
_en_node :: Node JSON.Value , _en_node_payload :: Maybe ExportableNodePayload
-- | If this node is a \"docs\" node, remotely export also } deriving Generic
data ExportableNodePayload
= -- | If this node is a \"docs\" node, remotely export also
-- all the associated documents. -- all the associated documents.
, _en_associated_docs :: Maybe DocumentExport ENP_document DocumentExport
-- | If this node is a \"terms\" node, remotely export also -- | If this node is a \"terms\" node, remotely export also
-- all the associated ngrams -- all the associated ngrams
, _en_associated_ngrams :: Maybe NgramsList | ENP_terms NgramsList
} deriving Generic -- | If this node is a \"note\" node, remotely export also
-- all the raw markdown blob
| ENP_notes T.Text
deriving Generic
instance Serialise ExportableNodePayload where
instance Serialise ExportableNode where instance Serialise ExportableNode where
remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m, HasNodeArchiveStoryImmediateSaver env) remoteAPI :: (MonadIO m, IsGargServer env BackendInternalError m, HasNodeArchiveStoryImmediateSaver env)
...@@ -112,25 +123,34 @@ remoteImportHandler loggedInUser c = do ...@@ -112,25 +123,34 @@ remoteImportHandler loggedInUser c = do
where where
insertNode :: Maybe NodeId -> ExportableNode -> m NodeId insertNode :: Maybe NodeId -> ExportableNode -> m NodeId
insertNode mb_parent (ExportableNode x mb_docs mb_terms) = case lookupDBid $ _node_typename x of insertNode mb_parent (ExportableNode x mb_payload) = case lookupDBid $ _node_typename x of
Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") Nothing -> throwError $ _BackendInternalError # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
Just ty -> do Just ty -> do
new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser) new_node <- insertNodeWithHyperdata ty (_node_name x) (_node_hyperdata x) mb_parent (_auth_user_id loggedInUser)
$(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty) $(logLocM) INFO $ "Created a new node " <> T.pack (show $ new_node) <> " of type " <> T.pack (show ty)
for_ (liftM2 (,) mb_docs mb_parent) $ \(docsList, parentId) -> do for_ (liftM2 (,) mb_payload mb_parent) $ \(exported_payload, parentId) -> do
$(logLocM) INFO $ "Found document list to import..." case exported_payload of
let totalDocs = _de_documents docsList ENP_document docsList
let chunks = Split.chunksOf 100 totalDocs -> do
forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do $(logLocM) INFO $ "Found document list to import..."
let ws = Jobs.WorkSplit let totalDocs = _de_documents docsList
{ Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk) let chunks = Split.chunksOf 100 totalDocs
, Jobs._ws_total = length totalDocs forM_ (zip [1..] chunks) $ \(local_ix, chunk) -> do
} let ws = Jobs.WorkSplit
let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws { Jobs._ws_current = min (length totalDocs) (((local_ix - 1) * length chunk) + length chunk)
void $ sendJob $ Jobs.ImportRemoteDocuments payload , Jobs._ws_total = length totalDocs
for_ mb_terms $ \ngramsList -> do }
$(logLocM) INFO $ "Found ngrams list to import..." let payload = Jobs.ImportRemoteDocumentsPayload loggedInUser parentId new_node chunk ws
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList void $ sendJob $ Jobs.ImportRemoteDocuments payload
ENP_terms ngramsList
-> do
$(logLocM) INFO $ "Found ngrams list to import..."
void $ sendJob $ Jobs.ImportRemoteTerms $ Jobs.ImportRemoteTermsPayload new_node ngramsList
ENP_notes _noteAsMarkdown
-> do
$(logLocM) INFO $ "Found some markdown notes to import"
-- FIXME(adn) actually import the notes
pure new_node pure new_node
insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId] insertTrees :: Maybe NodeId -> [NodeId] -> Tree ExportableNode -> m [NodeId]
...@@ -152,26 +172,42 @@ remoteExportHandler loggedInUser Named.RemoteExportRequest{..} = do ...@@ -152,26 +172,42 @@ remoteExportHandler loggedInUser Named.RemoteExportRequest{..} = do
liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder exportable)) (mkClientEnv mgr _rer_instance_url) streamDecode) liftIO (withClientM (remoteImportClient _rer_instance_auth (streamEncoder exportable)) (mkClientEnv mgr _rer_instance_url) streamDecode)
`Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e `Safe.catch` \(e :: BackendInternalError) -> throwError $ _BackendInternalError # e
makeExportable :: IsGargServer err env m makeExportable :: (MonadIO m, IsGargServer err env m)
=> NodeId => NodeId
-> Tree (Node JSON.Value) -> Tree (Node JSON.Value)
-> m (Tree ExportableNode) -> m (Tree ExportableNode)
makeExportable userNodeId (TreeN x xs) makeExportable userNodeId (TreeN x xs)
| Just nty <- lookupDBid (_node_typename x) | Just nty <- lookupDBid (_node_typename x)
= do = do
mb_docs <- case nty of mb_payload <- case nty of
NodeTexts -> Just <$> get_document_json userNodeId (_node_id x) NodeTexts -> Just . ENP_document <$> get_document_json userNodeId (_node_id x)
NodeList -> Just . ENP_terms <$> getNgramsList (_node_id x)
Notes -> case JS.parseMaybe JS.parseJSON (_node_hyperdata x) of
Nothing
-> pure Nothing
Just hframe
-> do
mgr <- view gargHttpManager
tryExportNode mgr hframe
_ -> pure Nothing _ -> pure Nothing
mb_ngrams <- case nty of let exportableRoot = ExportableNode x mb_payload
NodeList -> Just <$> getNgramsList (_node_id x)
_ -> pure Nothing
let exportableRoot = ExportableNode x mb_docs mb_ngrams
children <- mapM (makeExportable userNodeId) xs children <- mapM (makeExportable userNodeId) xs
pure $ TreeN exportableRoot children pure $ TreeN exportableRoot children
| otherwise | otherwise
= throwError $ _BackendInternalError = throwError $ _BackendInternalError
# InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.") # InternalUnexpectedError (toException $ userError $ "remoteImportHandler: impossible, node with invalid type.")
tryExportNode :: (MonadIO m, MonadLogger m) => HTTP.Manager -> HyperdataFrame -> m (Maybe ExportableNodePayload)
tryExportNode mgr HyperdataFrame{..} = do
let download_url = _hf_base <> "/" <> _hf_frame_id <> "/download"
case HTTP.parseRequest (T.unpack download_url) of
Left err -> do
$(logLocM) WARNING $ "Couldn't extract a valid URL from " <> download_url <> ", " <> T.pack (show err)
pure Nothing
Right rq -> do
res <- HTTP.responseBody <$> liftIO (HTTP.httpLbs rq mgr)
pure $ Just $ ENP_notes (TE.decodeUtf8 $ BL.toStrict $ res)
checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m () checkNodesTypeAllowed :: (MonadError e m, HasNodeError e) => Tree (Node a) -> m ()
checkNodesTypeAllowed (TreeN r xs) = do checkNodesTypeAllowed (TreeN r xs) = do
checkNodeTypeAllowed r checkNodeTypeAllowed r
...@@ -188,7 +224,7 @@ checkNodeTypeAllowed n ...@@ -188,7 +224,7 @@ checkNodeTypeAllowed n
-- | At the moment we support only export corpus nodes and their children (i.e. "Docs", "Terms", "Graph"). -- | At the moment we support only export corpus nodes and their children (i.e. "Docs", "Terms", "Graph").
exportableNodeTypes :: [NodeType] exportableNodeTypes :: [NodeType]
exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo ] exportableNodeTypes = [ NodeCorpus, NodeCorpusV3, NodeTexts, NodeGraph, NodeList, NodePhylo, Notes ]
streamEncoder :: MonadIO m => ExpectedPayload -> ConduitT () Named.RemoteBinaryData m () streamEncoder :: MonadIO m => ExpectedPayload -> ConduitT () Named.RemoteBinaryData m ()
streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise streamEncoder = CL.sourceList . map Named.RemoteBinaryData . BL.toChunks . serialise
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment