Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
191
Issues
191
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
2e22eac8
Verified
Commit
2e22eac8
authored
Sep 10, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLI] db fix command, to fix hyperdata #630
Dry run mode enabled by default
parent
e3a333b8
Pipeline
#7874
passed with stages
in 49 minutes and 21 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
194 additions
and
5 deletions
+194
-5
DBFixes.hs
bin/gargantext-cli/CLI/DBFixes.hs
+177
-0
Types.hs
bin/gargantext-cli/CLI/Types.hs
+7
-0
Main.hs
bin/gargantext-cli/Main.hs
+5
-2
gargantext.cabal
gargantext.cabal
+3
-1
Document.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
+2
-2
No files found.
bin/gargantext-cli/CLI/DBFixes.hs
0 → 100644
View file @
2e22eac8
{-|
Module : CLI.DBFixes
Description : Gargantext CLI DB fixes
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Fixes to the GarganText DB
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
CLI.DBFixes
where
import
CLI.Parsers
(
settings_p
)
import
CLI.Types
import
Control.Lens
qualified
as
Lens
import
Data.Aeson
qualified
as
Aeson
import
Data.Map.Strict
qualified
as
Map
import
Data.Text
qualified
as
T
import
Database.PostgreSQL.Simple
qualified
as
PSQL
import
Database.PostgreSQL.Simple.FromField
qualified
as
PSQL
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Core.Config
(
gc_database_config
,
gc_logging
)
import
Gargantext.Core.Config.Types
(
_SettingsFile
)
import
Gargantext.Core.Config.Utils
(
readConfig
)
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
),
hd_institutes_tree
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
(
fieldLabelModifier
,
omitNothingFields
,
sumEncoding
,
SumEncoding
(
..
),
genericParseJSON
,
genericToJSON
,
defaultOptions
,
fromField'
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Database
(
runPGSExecute
,
runPGSQuery
)
import
Gargantext.System.Logging
(
LogLevel
(
..
),
logLoc
,
withLogger
)
import
Options.Applicative
fixBrokenHyperdata630Cmd
::
HasCallStack
=>
Mod
CommandFields
CLI
fixBrokenHyperdata630Cmd
=
command
"fix-broken-hyperdata-630"
(
info
(
helper
<*>
(
fmap
CLISub
$
fmap
CCMD_db_fix_630
dbFixParser
))
(
progDesc
"Fix broken hyperdata, issue #630"
))
dbFixParser
::
Parser
DBFixArgs
dbFixParser
=
DBFixArgs
<$>
settings_p
<*>
flag
True
False
(
long
"no-dry-run"
<>
help
"Whether to dry run the DB fix (enabled by default)"
)
-- | Fixes issue https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/630
-- Basically, because of Haskell HyperdataDocument type change in 2024, there remain
-- documents in `contexts` table, where the `hyperdata->institutes_tree` is a map
-- from `Text` to `Text`, instead being a map from `Text` to `[Text]` (see
-- G.D.A.T.Hyperdata.Document).
-- We make a SQL query to find hyperdata containing `Text` in value,
-- parse these rows with Haskell to make sure this is indeed broken,
-- then type-safely conver to good values and update the row.
fixBrokenHyperdata630CLI
::
DBFixArgs
->
IO
()
fixBrokenHyperdata630CLI
(
DBFixArgs
{
settings_toml
,
dry_run
})
=
do
cfg
<-
liftIO
$
readConfig
settings_toml
withLogger
(
cfg
^.
gc_logging
)
$
\
logger
->
do
$
(
logLoc
)
logger
INFO
$
"settings file: "
<>
T
.
pack
(
_SettingsFile
settings_toml
)
let
dbConfig
=
cfg
^.
gc_database_config
$
(
logLoc
)
logger
INFO
$
"DB config: "
<>
show
dbConfig
c
<-
PSQL
.
connect
dbConfig
PSQL
.
begin
c
affectedContexts
<-
runPGSQuery
c
affectedDocsQ
()
::
IO
[(
Int
,
Hyperdata630Incorrect
)]
mapM_
(
\
(
cId
,
cHyperdata
)
->
do
putText
$
"["
<>
show
cId
<>
"] "
<>
show
(
cHyperdata
^.
Lens
.
to
_hd630i_institutes_tree
)
let
fixedHyperdata
=
fix630Hyperdata
cHyperdata
putText
$
" -> "
<>
show
(
fixedHyperdata
^.
hd_institutes_tree
)
_
<-
runPGSExecute
c
updateAffectedDoc
(
Aeson
.
encode
$
fixedHyperdata
^.
hd_institutes_tree
,
cId
)
-- Check that the hyperdata can be fetched correctly
newHd
<-
runPGSQuery
c
[
sql
|
SELECT hyperdata FROM contexts WHERE id = ?
|]
(
PSQL
.
Only
cId
)
::
IO
[
PSQL
.
Only
HyperdataDocument
]
unless
(
length
newHd
==
1
)
$
do
panicTrace
$
"["
<>
show
cId
<>
"] Error! Can't get new hyperdata: "
<>
show
newHd
putText
$
"["
<>
show
cId
<>
"] updated correctly"
)
affectedContexts
if
dry_run
then
do
$
(
logLoc
)
logger
INFO
$
"dry run mode, rolling back changes"
PSQL
.
rollback
c
else
do
$
(
logLoc
)
logger
INFO
$
"no dry run mode, comitting changes"
PSQL
.
commit
c
where
-- | SQL query to find all affected documents:
affectedDocsQ
=
[
sql
|
SELECT
c.id, c.hyperdata
FROM
contexts AS c
WHERE
-- does the hyperdata contain values with strings, instead of arrays?
EXISTS (
SELECT 1
FROM jsonb_each(c.hyperdata->'institutes_tree') AS kv(key, value) -- expand the top‑level map
WHERE
c.hyperdata->'institutes_tree' IS NOT NULL
AND jsonb_typeof(kv.value) = 'string' -- a plain string found
);
|]
updateAffectedDoc
=
[
sql
|
UPDATE contexts
SET hyperdata = jsonb_set(hyperdata, '{institutes_tree}', ?::jsonb)
WHERE id = ?
|]
data
Hyperdata630Incorrect
=
Hyperdata630Incorrect
{
_hd630i_bdd
::
!
(
Maybe
Text
)
,
_hd630i_doi
::
!
(
Maybe
Text
)
,
_hd630i_url
::
!
(
Maybe
Text
)
,
_hd630i_page
::
!
(
Maybe
Int
)
,
_hd630i_title
::
!
(
Maybe
Text
)
,
_hd630i_authors
::
!
(
Maybe
Text
)
,
_hd630i_institutes
::
!
(
Maybe
Text
)
,
_hd630i_source
::
!
(
Maybe
Text
)
,
_hd630i_abstract
::
!
(
Maybe
Text
)
,
_hd630i_publication_date
::
!
(
Maybe
Text
)
,
_hd630i_publication_year
::
!
(
Maybe
Int
)
,
_hd630i_publication_month
::
!
(
Maybe
Int
)
,
_hd630i_publication_day
::
!
(
Maybe
Int
)
,
_hd630i_publication_hour
::
!
(
Maybe
Int
)
,
_hd630i_publication_minute
::
!
(
Maybe
Int
)
,
_hd630i_publication_second
::
!
(
Maybe
Int
)
,
_hd630i_language_iso2
::
!
(
Maybe
Text
)
,
_hd630i_institutes_tree
::
!
(
Maybe
(
Map
Text
Text
))
}
deriving
(
Show
,
Generic
)
instance
FromJSON
Hyperdata630Incorrect
where
parseJSON
=
genericParseJSON
(
defaultOptions
{
sumEncoding
=
ObjectWithSingleField
,
fieldLabelModifier
=
unCapitalize
.
dropPrefix
"_hd630i_"
,
omitNothingFields
=
True
}
)
instance
ToJSON
Hyperdata630Incorrect
where
toJSON
=
genericToJSON
(
defaultOptions
{
sumEncoding
=
ObjectWithSingleField
,
fieldLabelModifier
=
unCapitalize
.
dropPrefix
"_hd630i_"
,
omitNothingFields
=
True
}
)
instance
PSQL
.
FromField
Hyperdata630Incorrect
where
fromField
=
fromField'
fix630Hyperdata
::
Hyperdata630Incorrect
->
HyperdataDocument
fix630Hyperdata
Hyperdata630Incorrect
{
..
}
=
HyperdataDocument
{
_hd_bdd
=
_hd630i_bdd
,
_hd_doi
=
_hd630i_doi
,
_hd_url
=
_hd630i_url
,
_hd_page
=
_hd630i_page
,
_hd_title
=
_hd630i_title
,
_hd_authors
=
_hd630i_authors
,
_hd_institutes
=
_hd630i_institutes
,
_hd_source
=
_hd630i_source
,
_hd_abstract
=
_hd630i_abstract
,
_hd_publication_date
=
_hd630i_publication_date
,
_hd_publication_year
=
_hd630i_publication_year
,
_hd_publication_month
=
_hd630i_publication_month
,
_hd_publication_day
=
_hd630i_publication_day
,
_hd_publication_hour
=
_hd630i_publication_hour
,
_hd_publication_minute
=
_hd630i_publication_minute
,
_hd_publication_second
=
_hd630i_publication_second
,
_hd_language_iso2
=
_hd630i_language_iso2
,
_hd_institutes_tree
}
where
_hd_institutes_tree
=
Map
.
map
(
\
v
->
[
v
])
<$>
_hd630i_institutes_tree
bin/gargantext-cli/CLI/Types.hs
View file @
2e22eac8
...
@@ -129,6 +129,12 @@ data WorkerStatsArgs = WorkerStatsArgs
...
@@ -129,6 +129,12 @@ data WorkerStatsArgs = WorkerStatsArgs
{
ws_toml
::
!
SettingsFile
{
ws_toml
::
!
SettingsFile
}
deriving
(
Show
,
Eq
)
}
deriving
(
Show
,
Eq
)
-- arguments when DB fixing is needed
data
DBFixArgs
=
DBFixArgs
{
settings_toml
::
!
SettingsFile
,
dry_run
::
!
Bool
}
deriving
(
Show
,
Eq
)
data
CLICmd
data
CLICmd
=
CCMD_admin
!
AdminArgs
=
CCMD_admin
!
AdminArgs
|
CCMD_clean_csv_corpus
|
CCMD_clean_csv_corpus
...
@@ -145,6 +151,7 @@ data CLICmd
...
@@ -145,6 +151,7 @@ data CLICmd
|
CCMD_server
!
CLIServer
|
CCMD_server
!
CLIServer
|
CCMD_upgrade
!
UpgradeArgs
|
CCMD_upgrade
!
UpgradeArgs
|
CCMD_worker
!
CLIWorker
|
CCMD_worker
!
CLIWorker
|
CCMD_db_fix_630
DBFixArgs
deriving
(
Show
,
Eq
)
deriving
(
Show
,
Eq
)
data
CLI
=
data
CLI
=
...
...
bin/gargantext-cli/Main.hs
View file @
2e22eac8
...
@@ -13,12 +13,12 @@ Main specifications to index a corpus with a term list
...
@@ -13,12 +13,12 @@ Main specifications to index a corpus with a term list
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeApplications #-}
module
Main
where
module
Main
where
import
CLI.Admin
(
adminCLI
,
adminCmd
)
import
CLI.Admin
(
adminCLI
,
adminCmd
)
import
CLI.DBFixes
(
fixBrokenHyperdata630Cmd
,
fixBrokenHyperdata630CLI
)
import
CLI.FileDiff
(
fileDiffCLI
,
fileDiffCmd
)
import
CLI.FileDiff
(
fileDiffCLI
,
fileDiffCmd
)
import
CLI.FilterTermsAndCooc
import
CLI.FilterTermsAndCooc
import
CLI.Import
(
importCLI
,
importCmd
)
import
CLI.Import
(
importCLI
,
importCmd
)
...
@@ -69,6 +69,8 @@ runCLI = \case
...
@@ -69,6 +69,8 @@ runCLI = \case
->
serverCLI
args
->
serverCLI
args
CLISub
(
CCMD_worker
args
)
CLISub
(
CCMD_worker
args
)
->
workerCLI
args
->
workerCLI
args
CLISub
(
CCMD_db_fix_630
args
)
->
fixBrokenHyperdata630CLI
args
main
::
IO
()
main
::
IO
()
...
@@ -94,5 +96,6 @@ allOptions = subparser (
...
@@ -94,5 +96,6 @@ allOptions = subparser (
fileDiffCmd
<>
fileDiffCmd
<>
routesCmd
<>
routesCmd
<>
serverCmd
<>
serverCmd
<>
workerCmd
workerCmd
<>
fixBrokenHyperdata630Cmd
)
)
gargantext.cabal
View file @
2e22eac8
...
@@ -292,6 +292,7 @@ library
...
@@ -292,6 +292,7 @@ library
Gargantext.Database.Admin.Types.Hyperdata.Corpus
Gargantext.Database.Admin.Types.Hyperdata.Corpus
Gargantext.Database.Admin.Types.Hyperdata.Document
Gargantext.Database.Admin.Types.Hyperdata.Document
Gargantext.Database.Admin.Types.Hyperdata.Folder
Gargantext.Database.Admin.Types.Hyperdata.Folder
Gargantext.Database.Admin.Types.Hyperdata.Prelude
Gargantext.Database.Admin.Types.Node
Gargantext.Database.Admin.Types.Node
Gargantext.Database.Class
Gargantext.Database.Class
Gargantext.Database.Prelude
Gargantext.Database.Prelude
...
@@ -461,7 +462,6 @@ library
...
@@ -461,7 +462,6 @@ library
Gargantext.Database.Admin.Types.Hyperdata.List
Gargantext.Database.Admin.Types.Hyperdata.List
Gargantext.Database.Admin.Types.Hyperdata.Model
Gargantext.Database.Admin.Types.Hyperdata.Model
Gargantext.Database.Admin.Types.Hyperdata.Phylo
Gargantext.Database.Admin.Types.Hyperdata.Phylo
Gargantext.Database.Admin.Types.Hyperdata.Prelude
Gargantext.Database.Admin.Types.Hyperdata.Texts
Gargantext.Database.Admin.Types.Hyperdata.Texts
Gargantext.Database.Admin.Types.Hyperdata.User
Gargantext.Database.Admin.Types.Hyperdata.User
Gargantext.Database.Admin.Types.Metrics
Gargantext.Database.Admin.Types.Metrics
...
@@ -673,6 +673,7 @@ executable gargantext
...
@@ -673,6 +673,7 @@ executable gargantext
main-is: Main.hs
main-is: Main.hs
other-modules:
other-modules:
CLI.Admin
CLI.Admin
CLI.DBFixes
CLI.FileDiff
CLI.FileDiff
CLI.FilterTermsAndCooc
CLI.FilterTermsAndCooc
CLI.Import
CLI.Import
...
@@ -705,6 +706,7 @@ executable gargantext
...
@@ -705,6 +706,7 @@ executable gargantext
, gargantext
, gargantext
, gargantext-prelude
, gargantext-prelude
, haskell-bee
, haskell-bee
, lens >= 5.2.2 && < 5.3
, MonadRandom ^>= 0.6
, MonadRandom ^>= 0.6
, optparse-applicative
, optparse-applicative
, postgresql-simple >= 0.6.4 && <= 0.7.0.0
, postgresql-simple >= 0.6.4 && <= 0.7.0.0
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
View file @
2e22eac8
...
@@ -13,11 +13,11 @@ Portability : POSIX
...
@@ -13,11 +13,11 @@ Portability : POSIX
module
Gargantext.Database.Admin.Types.Hyperdata.Document
where
module
Gargantext.Database.Admin.Types.Hyperdata.Document
where
import
Gargantext.Prelude
hiding
(
ByteString
)
import
Codec.Serialise.Class
hiding
(
decode
)
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unCapitalize
,
dropPrefix
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
Codec.Serialise.Class
hiding
(
decode
)
import
Gargantext.Prelude
hiding
(
ByteString
)
------------------------------------------------------------------------
------------------------------------------------------------------------
data
HyperdataDocument
=
HyperdataDocument
{
_hd_bdd
::
!
(
Maybe
Text
)
data
HyperdataDocument
=
HyperdataDocument
{
_hd_bdd
::
!
(
Maybe
Text
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment