Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
2e22eac8
Verified
Commit
2e22eac8
authored
Sep 10, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLI] db fix command, to fix hyperdata #630
Dry run mode enabled by default
parent
e3a333b8
Pipeline
#7874
passed with stages
in 49 minutes and 21 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
194 additions
and
5 deletions
+194
-5
DBFixes.hs
bin/gargantext-cli/CLI/DBFixes.hs
+177
-0
Types.hs
bin/gargantext-cli/CLI/Types.hs
+7
-0
Main.hs
bin/gargantext-cli/Main.hs
+5
-2
gargantext.cabal
gargantext.cabal
+3
-1
Document.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
+2
-2
No files found.
bin/gargantext-cli/CLI/DBFixes.hs
0 → 100644
View file @
2e22eac8
{-|
Module : CLI.DBFixes
Description : Gargantext CLI DB fixes
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Fixes to the GarganText DB
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
CLI.DBFixes
where
import
CLI.Parsers
(
settings_p
)
import
CLI.Types
import
Control.Lens
qualified
as
Lens
import
Data.Aeson
qualified
as
Aeson
import
Data.Map.Strict
qualified
as
Map
import
Data.Text
qualified
as
T
import
Database.PostgreSQL.Simple
qualified
as
PSQL
import
Database.PostgreSQL.Simple.FromField
qualified
as
PSQL
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Core.Config
(
gc_database_config
,
gc_logging
)
import
Gargantext.Core.Config.Types
(
_SettingsFile
)
import
Gargantext.Core.Config.Utils
(
readConfig
)
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
),
hd_institutes_tree
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
(
fieldLabelModifier
,
omitNothingFields
,
sumEncoding
,
SumEncoding
(
..
),
genericParseJSON
,
genericToJSON
,
defaultOptions
,
fromField'
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Database
(
runPGSExecute
,
runPGSQuery
)
import
Gargantext.System.Logging
(
LogLevel
(
..
),
logLoc
,
withLogger
)
import
Options.Applicative
fixBrokenHyperdata630Cmd
::
HasCallStack
=>
Mod
CommandFields
CLI
fixBrokenHyperdata630Cmd
=
command
"fix-broken-hyperdata-630"
(
info
(
helper
<*>
(
fmap
CLISub
$
fmap
CCMD_db_fix_630
dbFixParser
))
(
progDesc
"Fix broken hyperdata, issue #630"
))
dbFixParser
::
Parser
DBFixArgs
dbFixParser
=
DBFixArgs
<$>
settings_p
<*>
flag
True
False
(
long
"no-dry-run"
<>
help
"Whether to dry run the DB fix (enabled by default)"
)
-- | Fixes issue https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/630
-- Basically, because of Haskell HyperdataDocument type change in 2024, there remain
-- documents in `contexts` table, where the `hyperdata->institutes_tree` is a map
-- from `Text` to `Text`, instead being a map from `Text` to `[Text]` (see
-- G.D.A.T.Hyperdata.Document).
-- We make a SQL query to find hyperdata containing `Text` in value,
-- parse these rows with Haskell to make sure this is indeed broken,
-- then type-safely conver to good values and update the row.
fixBrokenHyperdata630CLI
::
DBFixArgs
->
IO
()
fixBrokenHyperdata630CLI
(
DBFixArgs
{
settings_toml
,
dry_run
})
=
do
cfg
<-
liftIO
$
readConfig
settings_toml
withLogger
(
cfg
^.
gc_logging
)
$
\
logger
->
do
$
(
logLoc
)
logger
INFO
$
"settings file: "
<>
T
.
pack
(
_SettingsFile
settings_toml
)
let
dbConfig
=
cfg
^.
gc_database_config
$
(
logLoc
)
logger
INFO
$
"DB config: "
<>
show
dbConfig
c
<-
PSQL
.
connect
dbConfig
PSQL
.
begin
c
affectedContexts
<-
runPGSQuery
c
affectedDocsQ
()
::
IO
[(
Int
,
Hyperdata630Incorrect
)]
mapM_
(
\
(
cId
,
cHyperdata
)
->
do
putText
$
"["
<>
show
cId
<>
"] "
<>
show
(
cHyperdata
^.
Lens
.
to
_hd630i_institutes_tree
)
let
fixedHyperdata
=
fix630Hyperdata
cHyperdata
putText
$
" -> "
<>
show
(
fixedHyperdata
^.
hd_institutes_tree
)
_
<-
runPGSExecute
c
updateAffectedDoc
(
Aeson
.
encode
$
fixedHyperdata
^.
hd_institutes_tree
,
cId
)
-- Check that the hyperdata can be fetched correctly
newHd
<-
runPGSQuery
c
[
sql
|
SELECT hyperdata FROM contexts WHERE id = ?
|]
(
PSQL
.
Only
cId
)
::
IO
[
PSQL
.
Only
HyperdataDocument
]
unless
(
length
newHd
==
1
)
$
do
panicTrace
$
"["
<>
show
cId
<>
"] Error! Can't get new hyperdata: "
<>
show
newHd
putText
$
"["
<>
show
cId
<>
"] updated correctly"
)
affectedContexts
if
dry_run
then
do
$
(
logLoc
)
logger
INFO
$
"dry run mode, rolling back changes"
PSQL
.
rollback
c
else
do
$
(
logLoc
)
logger
INFO
$
"no dry run mode, comitting changes"
PSQL
.
commit
c
where
-- | SQL query to find all affected documents:
affectedDocsQ
=
[
sql
|
SELECT
c.id, c.hyperdata
FROM
contexts AS c
WHERE
-- does the hyperdata contain values with strings, instead of arrays?
EXISTS (
SELECT 1
FROM jsonb_each(c.hyperdata->'institutes_tree') AS kv(key, value) -- expand the top‑level map
WHERE
c.hyperdata->'institutes_tree' IS NOT NULL
AND jsonb_typeof(kv.value) = 'string' -- a plain string found
);
|]
updateAffectedDoc
=
[
sql
|
UPDATE contexts
SET hyperdata = jsonb_set(hyperdata, '{institutes_tree}', ?::jsonb)
WHERE id = ?
|]
data
Hyperdata630Incorrect
=
Hyperdata630Incorrect
{
_hd630i_bdd
::
!
(
Maybe
Text
)
,
_hd630i_doi
::
!
(
Maybe
Text
)
,
_hd630i_url
::
!
(
Maybe
Text
)
,
_hd630i_page
::
!
(
Maybe
Int
)
,
_hd630i_title
::
!
(
Maybe
Text
)
,
_hd630i_authors
::
!
(
Maybe
Text
)
,
_hd630i_institutes
::
!
(
Maybe
Text
)
,
_hd630i_source
::
!
(
Maybe
Text
)
,
_hd630i_abstract
::
!
(
Maybe
Text
)
,
_hd630i_publication_date
::
!
(
Maybe
Text
)
,
_hd630i_publication_year
::
!
(
Maybe
Int
)
,
_hd630i_publication_month
::
!
(
Maybe
Int
)
,
_hd630i_publication_day
::
!
(
Maybe
Int
)
,
_hd630i_publication_hour
::
!
(
Maybe
Int
)
,
_hd630i_publication_minute
::
!
(
Maybe
Int
)
,
_hd630i_publication_second
::
!
(
Maybe
Int
)
,
_hd630i_language_iso2
::
!
(
Maybe
Text
)
,
_hd630i_institutes_tree
::
!
(
Maybe
(
Map
Text
Text
))
}
deriving
(
Show
,
Generic
)
instance
FromJSON
Hyperdata630Incorrect
where
parseJSON
=
genericParseJSON
(
defaultOptions
{
sumEncoding
=
ObjectWithSingleField
,
fieldLabelModifier
=
unCapitalize
.
dropPrefix
"_hd630i_"
,
omitNothingFields
=
True
}
)
instance
ToJSON
Hyperdata630Incorrect
where
toJSON
=
genericToJSON
(
defaultOptions
{
sumEncoding
=
ObjectWithSingleField
,
fieldLabelModifier
=
unCapitalize
.
dropPrefix
"_hd630i_"
,
omitNothingFields
=
True
}
)
instance
PSQL
.
FromField
Hyperdata630Incorrect
where
fromField
=
fromField'
fix630Hyperdata
::
Hyperdata630Incorrect
->
HyperdataDocument
fix630Hyperdata
Hyperdata630Incorrect
{
..
}
=
HyperdataDocument
{
_hd_bdd
=
_hd630i_bdd
,
_hd_doi
=
_hd630i_doi
,
_hd_url
=
_hd630i_url
,
_hd_page
=
_hd630i_page
,
_hd_title
=
_hd630i_title
,
_hd_authors
=
_hd630i_authors
,
_hd_institutes
=
_hd630i_institutes
,
_hd_source
=
_hd630i_source
,
_hd_abstract
=
_hd630i_abstract
,
_hd_publication_date
=
_hd630i_publication_date
,
_hd_publication_year
=
_hd630i_publication_year
,
_hd_publication_month
=
_hd630i_publication_month
,
_hd_publication_day
=
_hd630i_publication_day
,
_hd_publication_hour
=
_hd630i_publication_hour
,
_hd_publication_minute
=
_hd630i_publication_minute
,
_hd_publication_second
=
_hd630i_publication_second
,
_hd_language_iso2
=
_hd630i_language_iso2
,
_hd_institutes_tree
}
where
_hd_institutes_tree
=
Map
.
map
(
\
v
->
[
v
])
<$>
_hd630i_institutes_tree
bin/gargantext-cli/CLI/Types.hs
View file @
2e22eac8
...
@@ -129,6 +129,12 @@ data WorkerStatsArgs = WorkerStatsArgs
...
@@ -129,6 +129,12 @@ data WorkerStatsArgs = WorkerStatsArgs
{
ws_toml
::
!
SettingsFile
{
ws_toml
::
!
SettingsFile
}
deriving
(
Show
,
Eq
)
}
deriving
(
Show
,
Eq
)
-- arguments when DB fixing is needed
data
DBFixArgs
=
DBFixArgs
{
settings_toml
::
!
SettingsFile
,
dry_run
::
!
Bool
}
deriving
(
Show
,
Eq
)
data
CLICmd
data
CLICmd
=
CCMD_admin
!
AdminArgs
=
CCMD_admin
!
AdminArgs
|
CCMD_clean_csv_corpus
|
CCMD_clean_csv_corpus
...
@@ -145,6 +151,7 @@ data CLICmd
...
@@ -145,6 +151,7 @@ data CLICmd
|
CCMD_server
!
CLIServer
|
CCMD_server
!
CLIServer
|
CCMD_upgrade
!
UpgradeArgs
|
CCMD_upgrade
!
UpgradeArgs
|
CCMD_worker
!
CLIWorker
|
CCMD_worker
!
CLIWorker
|
CCMD_db_fix_630
DBFixArgs
deriving
(
Show
,
Eq
)
deriving
(
Show
,
Eq
)
data
CLI
=
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
2e22eac8
...
@@ -13,12 +13,12 @@ Main specifications to index a corpus with a term list
...
@@ -13,12 +13,12 @@ Main specifications to index a corpus with a term list
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeApplications #-}
module
Main
where
module
Main
where
import
CLI.Admin
(
adminCLI
,
adminCmd
)
import
CLI.Admin
(
adminCLI
,
adminCmd
)
import
CLI.DBFixes
(
fixBrokenHyperdata630Cmd
,
fixBrokenHyperdata630CLI
)
import
CLI.FileDiff
(
fileDiffCLI
,
fileDiffCmd
)
import
CLI.FileDiff
(
fileDiffCLI
,
fileDiffCmd
)
import
CLI.FilterTermsAndCooc
import
CLI.FilterTermsAndCooc
import
CLI.Import
(
importCLI
,
importCmd
)
import
CLI.Import
(
importCLI
,
importCmd
)
...
@@ -69,6 +69,8 @@ runCLI = \case
...
@@ -69,6 +69,8 @@ runCLI = \case
->
serverCLI
args
->
serverCLI
args
CLISub
(
CCMD_worker
args
)
CLISub
(
CCMD_worker
args
)
->
workerCLI
args
->
workerCLI
args
CLISub
(
CCMD_db_fix_630
args
)
->
fixBrokenHyperdata630CLI
args
main
::
IO
()
main
::
IO
()
...
@@ -94,5 +96,6 @@ allOptions = subparser (
...
@@ -94,5 +96,6 @@ allOptions = subparser (
fileDiffCmd
<>
fileDiffCmd
<>
routesCmd
<>
routesCmd
<>
serverCmd
<>
serverCmd
<>
workerCmd
workerCmd
<>
fixBrokenHyperdata630Cmd
)
)
gargantext.cabal
View file @
2e22eac8
...
@@ -292,6 +292,7 @@ library
...
@@ -292,6 +292,7 @@ library
Gargantext.Database.Admin.Types.Hyperdata.Corpus
Gargantext.Database.Admin.Types.Hyperdata.Corpus
Gargantext.Database.Admin.Types.Hyperdata.Document
Gargantext.Database.Admin.Types.Hyperdata.Document
Gargantext.Database.Admin.Types.Hyperdata.Folder
Gargantext.Database.Admin.Types.Hyperdata.Folder
Gargantext.Database.Admin.Types.Hyperdata.Prelude
Gargantext.Database.Admin.Types.Node
Gargantext.Database.Admin.Types.Node
Gargantext.Database.Class
Gargantext.Database.Class
Gargantext.Database.Prelude
Gargantext.Database.Prelude
...
@@ -461,7 +462,6 @@ library
...
@@ -461,7 +462,6 @@ library
Gargantext.Database.Admin.Types.Hyperdata.List
Gargantext.Database.Admin.Types.Hyperdata.List
Gargantext.Database.Admin.Types.Hyperdata.Model
Gargantext.Database.Admin.Types.Hyperdata.Model
Gargantext.Database.Admin.Types.Hyperdata.Phylo
Gargantext.Database.Admin.Types.Hyperdata.Phylo
Gargantext.Database.Admin.Types.Hyperdata.Prelude
Gargantext.Database.Admin.Types.Hyperdata.Texts
Gargantext.Database.Admin.Types.Hyperdata.Texts
Gargantext.Database.Admin.Types.Hyperdata.User
Gargantext.Database.Admin.Types.Hyperdata.User
Gargantext.Database.Admin.Types.Metrics
Gargantext.Database.Admin.Types.Metrics
...
@@ -673,6 +673,7 @@ executable gargantext
...
@@ -673,6 +673,7 @@ executable gargantext
main-is: Main.hs
main-is: Main.hs
other-modules:
other-modules:
CLI.Admin
CLI.Admin
CLI.DBFixes
CLI.FileDiff
CLI.FileDiff
CLI.FilterTermsAndCooc
CLI.FilterTermsAndCooc
CLI.Import
CLI.Import
...
@@ -705,6 +706,7 @@ executable gargantext
...
@@ -705,6 +706,7 @@ executable gargantext
, gargantext
, gargantext
, gargantext-prelude
, gargantext-prelude
, haskell-bee
, haskell-bee
, lens >= 5.2.2 && < 5.3
, MonadRandom ^>= 0.6
, MonadRandom ^>= 0.6
, optparse-applicative
, optparse-applicative
, postgresql-simple >= 0.6.4 && <= 0.7.0.0
, postgresql-simple >= 0.6.4 && <= 0.7.0.0
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
View file @
2e22eac8
...
@@ -13,11 +13,11 @@ Portability : POSIX
...
@@ -13,11 +13,11 @@ Portability : POSIX
module
Gargantext.Database.Admin.Types.Hyperdata.Document
where
module
Gargantext.Database.Admin.Types.Hyperdata.Document
where
import
Gargantext.Prelude
hiding
(
ByteString
)
import
Codec.Serialise.Class
hiding
(
decode
)
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
Codec.Serialise.Class
hiding
(
decode
)
import
Gargantext.Prelude
hiding
(
ByteString
)
------------------------------------------------------------------------
------------------------------------------------------------------------
data
HyperdataDocument
=
HyperdataDocument
{
_hd_bdd
::
!
(
Maybe
Text
)
data
HyperdataDocument
=
HyperdataDocument
{
_hd_bdd
::
!
(
Maybe
Text
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment