Commit 48da15a3 authored by Fabien Maniere's avatar Fabien Maniere

Merge branch '362-dev-sqlite-performance-improvement' into 'dev'

[sqlite] improve performance of sqlite generation by proper PRAGMA

See merge request !431
parents c0b775e0 219cb31f
Pipeline #7771 failed with stages
in 39 minutes and 42 seconds
...@@ -28,11 +28,12 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) ) ...@@ -28,11 +28,12 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) )
import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData) import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (IsGargServer) import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Named.Corpus qualified as Named
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) ) import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ListId, context2node, nodeId2ContextId)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node ( defaultList ) import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
...@@ -41,7 +42,6 @@ import Gargantext.Prelude hiding (hash) ...@@ -41,7 +42,6 @@ import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
import Servant (Headers, Header, addHeader) import Servant (Headers, Header, addHeader)
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
import qualified Gargantext.API.Routes.Named.Corpus as Named
-------------------------------------------------- --------------------------------------------------
-- | Hashes are ordered by Set -- | Hashes are ordered by Set
......
...@@ -87,3 +87,4 @@ data CorpusSQLiteData = ...@@ -87,3 +87,4 @@ data CorpusSQLiteData =
, _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm) , _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm) , _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm)
} deriving (Show, Eq, Generic) } deriving (Show, Eq, Generic)
...@@ -141,7 +141,9 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa ...@@ -141,7 +141,9 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa
S.withConnection fpath $ \conn -> do S.withConnection fpath $ \conn -> do
-- better performance -- better performance
-- https://kerkour.com/sqlite-for-servers -- https://kerkour.com/sqlite-for-servers
S.execute_ conn "PRAGMA journal_mode = WAL" S.execute_ conn "PRAGMA journal_mode = WAL" -- overall, a good idea for sqlite
S.execute_ conn "PRAGMA synchronous = NORMAL" -- faster writes
S.execute_ conn "PRAGMA cache_size = 1000000" -- better memory usage
S.execute_ conn "CREATE TABLE info (key, value);" S.execute_ conn "CREATE TABLE info (key, value);"
S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version) S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version)
...@@ -179,6 +181,10 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa ...@@ -179,6 +181,10 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa
, iso8601Show ctxDate , iso8601Show ctxDate
, Aeson.encode ctxHyperdata )) <$> _csd_contexts) , Aeson.encode ctxHyperdata )) <$> _csd_contexts)
-- Force WAL checkpoint so we don't leave any data in the WAL log
-- (we only send the sqlite file, while WAL creates additional index files which would not be sent)
S.execute_ conn "PRAGMA wal_checkpoint(FULL)"
bsl <- BSL.readFile fpath bsl <- BSL.readFile fpath
pure $ CorpusSQLite { _cs_bs = bsl } pure $ CorpusSQLite { _cs_bs = bsl }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment