[sqlite] implement CorpuSQLiteData

With this data structure, we can tell what goes into SQLite and what
can (in the future implementation) be read from it.
parent 84ee557d
Pipeline #7372 passed with stages
in 60 minutes and 36 seconds
......@@ -34,7 +34,7 @@ data NgramsTree = NgramsTree { mt_label :: Text
, mt_value :: Double
, mt_children :: [NgramsTree]
}
deriving (Generic, Show)
deriving (Generic, Show, Eq)
toNgramsTree :: Tree (NgramsTerm,Double) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = NgramsTree l v (map toNgramsTree xs)
......
......@@ -24,7 +24,7 @@ import Data.Text (pack)
import Gargantext.API.Ngrams.Tools (getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) )
import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite)
import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (IsGargServer)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
......@@ -92,7 +92,8 @@ getCorpusSQLite :: (CES.MonadMask m, IsGargServer env err m)
-> Maybe ListId
-> m (Headers '[Header "Content-Disposition" Text] CorpusSQLite)
getCorpusSQLite cId lId = do
corpusSQLite <- mkCorpusSQLite cId lId
corpusSQLiteData <- mkCorpusSQLiteData cId lId
corpusSQLite <- liftBase $ mkCorpusSQLite corpusSQLiteData
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".sqlite")
$ corpusSQLite
......
......@@ -15,10 +15,18 @@ module Gargantext.API.Node.Corpus.Export.Types where
import Data.ByteString.Lazy qualified as BSL
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..), NamedSchema(..), binarySchema )
import Data.Time.Clock (UTCTime)
import Data.Version (Version)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.Core.Types ( TODO )
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core.Types ( CorpusId, ListId, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.List (HyperdataList)
import Gargantext.Database.Admin.Types.Node (ContextId, NodeId)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash qualified as H
import Servant (Accept(..), MimeRender(mimeRender), MimeUnrender(mimeUnrender), OctetStream)
......@@ -44,7 +52,7 @@ $(deriveJSON (unPrefix "_c_") ''Corpus)
-- corpus
newtype CorpusSQLite =
CorpusSQLite { _cs_bs :: BSL.ByteString }
deriving (Generic)
deriving (Generic, NFData)
instance Accept CorpusSQLite where
contentType _ = contentType (Proxy :: Proxy OctetStream)
instance MimeRender OctetStream CorpusSQLite where
......@@ -55,3 +63,27 @@ instance MimeUnrender OctetStream CorpusSQLite where
instance ToSchema CorpusSQLite where
declareNamedSchema _ = pure $ NamedSchema (Just "CorpusSQLite") binarySchema
-- | Contents of the SQLite export DB
-- (having such datatype makes it easier to coherently implement import/export)
data CorpusSQLiteData =
CorpusSQLiteData { _csd_version :: Version
, _csd_cId :: CorpusId
, _csd_lId :: ListId
, _csd_created :: UTCTime
, _csd_corpus_name :: Text
, _csd_corpus_hash :: Maybe H.Hash
, _csd_corpus_parent :: Maybe NodeId
, _csd_corpus_hyperdata :: HyperdataCorpus
, _csd_list_name :: Text
, _csd_list_parent :: Maybe NodeId
, _csd_list_hyperdata :: HyperdataList
, _csd_contexts :: [(NodeId, Text, UTCTime, HyperdataDocument)]
, _csd_map_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm)
} deriving (Show, Eq, Generic)
......@@ -19,11 +19,13 @@ import Data.ByteString.Lazy qualified as BSL
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Time.Clock (getCurrentTime)
import Data.Time.Format.ISO8601 (iso8601Show)
import Data.Version (showVersion)
import Database.SQLite.Simple qualified as S
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..))
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..), CorpusSQLiteData(..))
import Gargantext.Core.NodeStory.Types ( HasNodeStoryEnv, NodeListStory )
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types (CorpusId, ListId, NodeType(NodeList))
......@@ -68,16 +70,16 @@ getContextNgrams cId lId listType nt repo = do
pure r
mkCorpusSQLite :: ( CES.MonadMask m
mkCorpusSQLiteData :: ( CES.MonadMask m
, HasNodeStoryEnv env
, HasNodeError err
, IsDBCmd env err m )
=> CorpusId
-> Maybe ListId
-> m CorpusSQLite
mkCorpusSQLite cId lId = withTempSQLiteDir $ \(fp, _fname, fpath) -> do
-> m CorpusSQLiteData
mkCorpusSQLiteData cId lId = do
corpus <- getNodeWith cId (Proxy @HyperdataCorpus)
now <- liftBase getCurrentTime
listId <- case lId of
Nothing -> defaultList cId
......@@ -92,41 +94,81 @@ mkCorpusSQLite cId lId = withTempSQLiteDir $ \(fp, _fname, fpath) -> do
candidateNgs <- getContextNgrams cId listId CandidateTerm nt repo
docs <- selectDocNodes cId
liftBase $ putText $ "[mkCorpusSQLite] listId: " <> show listId
liftBase $ putText $ "[mkCorpusSQLite] fp: " <> show fp
pure $ CorpusSQLiteData { _csd_version = PG.version
, _csd_cId = cId
, _csd_lId = listId
, _csd_created = now
, _csd_corpus_name = corpus ^. node_name
, _csd_corpus_hash = corpus ^. node_hash_id
, _csd_corpus_parent = corpus ^. node_parent_id
, _csd_corpus_hyperdata = corpus ^. node_hyperdata
, _csd_list_name = l ^. node_name
, _csd_list_parent = l ^. node_parent_id
, _csd_list_hyperdata = l ^. node_hyperdata
, _csd_contexts = (\c -> ( c ^. context_id
, c ^. context_name
, c ^. context_date
, c ^. context_hyperdata)) <$> docs
, _csd_map_context_ngrams = mapNgs
, _csd_stop_context_ngrams = stopNgs
, _csd_candidate_context_ngrams = candidateNgs
}
mkCorpusSQLite :: ( CES.MonadMask m
, MonadBase IO m )
=> CorpusSQLiteData
-> m CorpusSQLite
mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpath) -> liftBase $ do
putText $ "[mkCorpusSQLite] listId: " <> show _csd_lId
putText $ "[mkCorpusSQLite] fp: " <> show fp
liftBase $ S.withConnection fpath $ \conn -> do
S.withConnection fpath $ \conn -> do
-- better performance
-- https://kerkour.com/sqlite-for-servers
S.execute_ conn "PRAGMA journal_mode = WAL"
S.execute_ conn "CREATE TABLE info (key, value);"
S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion PG.version)
S.execute conn "INSERT INTO info (key, value) VALUES ('corpusId', ?)" (S.Only $ unNodeId cId)
S.execute conn "INSERT INTO info (key, value) VALUES ('listId', ?)" (S.Only $ unNodeId listId)
S.execute_ conn "INSERT INTO info (key, value) VALUES ('created', datetime())"
S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version)
S.execute conn "INSERT INTO info (key, value) VALUES ('corpusId', ?)" (S.Only $ unNodeId _csd_cId)
S.execute conn "INSERT INTO info (key, value) VALUES ('listId', ?)" (S.Only $ unNodeId _csd_lId)
S.execute conn "INSERT INTO info (key, value) VALUES ('created', datetime(?))" (S.Only $ iso8601Show _csd_created)
S.execute_ conn "CREATE TABLE corpus (id, name, hash, parent_id, hyperdata)"
S.execute conn "INSERT INTO corpus (id, name, hash, parent_id, hyperdata) VALUES (?, ?, ?, ?, ?)"
(unNodeId cId, corpus ^. node_name, corpus ^. node_hash_id, unNodeId <$> (corpus ^. node_parent_id), Aeson.encode (corpus ^. node_hyperdata))
( unNodeId _csd_cId
, _csd_corpus_name
, _csd_corpus_hash
, unNodeId <$> _csd_corpus_parent
, Aeson.encode _csd_corpus_hyperdata )
S.execute_ conn "CREATE TABLE lists (id, name, parent_id, hyperdata)"
S.execute conn "INSERT INTO lists (id, name, parent_id, hyperdata) VALUES (?, ?, ?, ?)"
(unNodeId listId, l ^. node_name, unNodeId <$> (l ^. node_parent_id), Aeson.encode (l ^. node_hyperdata))
( unNodeId _csd_lId
, _csd_list_name
, unNodeId <$> _csd_list_parent
, Aeson.encode _csd_list_hyperdata )
S.execute_ conn "CREATE TABLE ngrams (context_id, terms, type_)"
let insertTerms ngs type_ = do
let ngs' = concatMap (\(ctxId, ngrams) -> (\n -> (_ContextId ctxId, unNgramsTerm n, type_)) <$> Set.toList ngrams) (Map.toList ngs)
S.executeMany conn "INSERT INTO ngrams (context_id, terms, type_) VALUES (?, ?, ?)" ngs'
insertTerms mapNgs ("map" :: Text)
insertTerms stopNgs ("stop" :: Text)
insertTerms candidateNgs ("candidate" :: Text)
insertTerms _csd_map_context_ngrams ("map" :: Text)
insertTerms _csd_stop_context_ngrams ("stop" :: Text)
insertTerms _csd_candidate_context_ngrams ("candidate" :: Text)
S.execute_ conn "CREATE TABLE documents (context_id, name, date, hyperdata)"
S.executeMany conn "INSERT INTO documents (context_id, name, date, hyperdata) VALUES (?, ?, ?, ?)"
((\c -> (unNodeId (c ^. context_id), c ^. context_name, c ^. context_date, Aeson.encode (c ^. context_hyperdata))) <$> docs)
S.executeMany conn "INSERT INTO documents (context_id, name, date, hyperdata) VALUES (?, ?, date(?), ?)"
((\(ctxId, ctxName, ctxDate, ctxHyperdata) -> ( unNodeId ctxId
, ctxName
, iso8601Show ctxDate
, Aeson.encode ctxHyperdata )) <$> _csd_contexts)
bsl <- liftBase $ BSL.readFile fpath
bsl <- BSL.readFile fpath
pure $ CorpusSQLite bsl
......
......@@ -22,7 +22,7 @@ data Chart = ChartHisto | ChartScatter | ChartPie
data Histo = Histo { histo_dates :: !(Vector Text)
, histo_count :: !(Vector Int)
}
deriving (Show, Generic)
deriving (Show, Generic, Eq)
instance ToSchema Histo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "histo_")
......
......@@ -28,7 +28,7 @@ data HyperdataCorpus =
-- 'defaultLanguage' if we don't know which language it is.
, _hc_lang :: Maybe Lang
}
deriving (Generic, Show)
deriving (Generic, Eq, Show)
defaultHyperdataCorpus :: HyperdataCorpus
defaultHyperdataCorpus =
......
......@@ -34,7 +34,7 @@ data CorpusField = MarkdownField { _cf_text :: !Text }
, _cf_authors :: !Text
-- , _cf_resources :: ![Resource]
}
deriving (Show, Generic)
deriving (Show, Generic, Eq)
defaultCorpusField :: CorpusField
defaultCorpusField = MarkdownField "# Title"
......@@ -56,7 +56,7 @@ data HyperdataField a =
HyperdataField { _hf_type :: !CodeType
, _hf_name :: !Text
, _hf_data :: !a
} deriving (Generic, Show)
} deriving (Generic, Show, Eq)
defaultHyperdataField :: HyperdataField CorpusField
defaultHyperdataField = HyperdataField Markdown "name" defaultCorpusField
......
......@@ -34,7 +34,7 @@ data HyperdataList =
, _hl_pie :: !(HashMap TabType (ChartMetrics Histo))
, _hl_scatter :: !(HashMap TabType Metrics)
, _hl_tree :: !(HashMap TabType (ChartMetrics (Vector NgramsTree)))
} deriving (Show, Generic)
} deriving (Show, Generic, Eq)
-- HyperdataList { _hl_chart :: !(Maybe (ChartMetrics Histo))
-- , _hl_list :: !(Maybe Text)
-- , _hl_pie :: !(Maybe (ChartMetrics Histo))
......
......@@ -25,7 +25,7 @@ import Test.QuickCheck.Arbitrary
newtype Metrics = Metrics
{ metrics_data :: Vector Metric}
deriving (Generic, Show)
deriving (Generic, Show, Eq)
instance ToSchema Metrics where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "metrics_")
......@@ -38,7 +38,7 @@ data Metric = Metric
, m_x :: !Double
, m_y :: !Double
, m_cat :: !ListType
} deriving (Generic, Show)
} deriving (Generic, Show, Eq)
instance ToSchema Metric where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "m_")
......@@ -54,7 +54,7 @@ deriveJSON (unPrefix "metrics_") ''Metrics
newtype ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
deriving (Generic, Show)
deriving (Generic, Show, Eq)
instance (Typeable a, ToSchema a) => ToSchema (ChartMetrics a) where
declareNamedSchema = wellNamedSchema "chartMetrics_"
......
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Test.API.Export (tests) where
import Control.Lens (view)
import Data.ByteString.Lazy qualified as BSL
import Data.Version (showVersion)
import Database.SQLite.Simple qualified as S
-- import Fmt (build)
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..))
import Gargantext.API.Node.Corpus.Export.Utils (withTempSQLiteDir)
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..), CorpusSQLiteData(..))
import Gargantext.API.Node.Corpus.Export.Utils (withTempSQLiteDir, mkCorpusSQLiteData)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Terms (TermType(Multi))
import Gargantext.Core.Types (unNodeId)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node (NodeType(NodeFolder, NodeCorpus, NodeFolderPrivate))
import Gargantext.Database.Query.Table.Node (getOrMkList, getNodeWith, insertDefaultNode, insertNode)
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (get)
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant.API.ResponseHeaders (Headers(getResponse))
import Servant.Auth.Client ()
import Servant.Client
import Servant.Client.Streaming (runClientM)
import Test.API.Prelude (checkEither)
import Test.API.Routes (get_corpus_sqlite_export)
import Test.API.Setup (withTestDBAndPort, dbEnvSetup, SpecContext (..))
import Test.API.UpdateList (createFortranDocsList)
import Test.Database.Operations.DocumentSearch (exampleDocument_01)
import Test.Database.Types (runTestMonad)
import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication)
import Test.Utils (withValidLogin)
......@@ -27,6 +41,31 @@ import Test.Utils (withValidLogin)
tests :: Spec
tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
describe "Export API" $ do
describe "Check CorpusSQLiteData creation" $ do
it "correctly creates CorpusSQLiteData" $ \ctx -> do
flip runReaderT (_sctx_env ctx) $ runTestMonad $ do
aliceUserId <- getUserId (UserName "alice")
aliceRootId <- getRootId (UserName "alice")
alicePrivateFolderId <- insertNode NodeFolderPrivate (Just "NodeFolderPrivate") Nothing aliceRootId aliceUserId
aliceFolderId <- insertDefaultNode NodeFolder alicePrivateFolderId aliceUserId
corpusId <- insertDefaultNode NodeCorpus aliceFolderId aliceUserId
aliceListId <- getOrMkList corpusId aliceUserId
corpus <- getNodeWith corpusId (Proxy @HyperdataCorpus)
let docs = [ exampleDocument_01 ]
let lang = EN
nlpServer <- view (nlpServerGet lang)
_ <- addDocumentsToHyperCorpus nlpServer (Just $ corpus ^. node_hyperdata) (Multi lang) corpusId docs
(CorpusSQLiteData { .. }) <- mkCorpusSQLiteData corpusId Nothing
liftIO $ do
_csd_version `shouldBe` PG.version
_csd_cId `shouldBe` corpusId
_csd_lId `shouldBe` aliceListId
length _csd_contexts `shouldBe` 1
describe "GET /api/v1.0/corpus/cId/sqlite" $ do
it "returns correct SQLite db" $ \ctx -> do
let port = _sctx_port ctx
......
......@@ -20,7 +20,7 @@ import Gargantext.Core.Types (NodeId, NodeType(..))
import Gargantext.Core.Worker.Env () -- instance HasNodeError
import Gargantext.Database.Action.User
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node (insertNode, mk, getUserRootPublicNode, getUserRootPrivateNode)
import Gargantext.Database.Query.Table.Node.User (getUserByName)
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Node (_node_id)
......
......@@ -8,9 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TupleSections #-}
module Test.Database.Operations.PublishNode where
import Prelude
......
......@@ -70,6 +70,9 @@ newtype TestMonad a = TestMonad { runTestMonad :: ReaderT TestEnv IO a }
, MonadBaseControl IO
, MonadFail
, MonadIO
, MonadMask
, MonadCatch
, MonadThrow
)
data TestJobHandle = TestNoJobHandle
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment