Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d6c2e293
Verified
Commit
d6c2e293
authored
Feb 21, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[sqlite] first implementation of sqlite export
parent
13457ca8
Pipeline
#7332
failed with stages
in 59 minutes and 16 seconds
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
210 additions
and
58 deletions
+210
-58
gargantext.cabal
gargantext.cabal
+2
-0
Export.hs
src/Gargantext/API/Node/Corpus/Export.hs
+17
-28
Types.hs
src/Gargantext/API/Node/Corpus/Export/Types.hs
+18
-5
Utils.hs
src/Gargantext/API/Node/Corpus/Export/Utils.hs
+143
-0
Corpus.hs
src/Gargantext/API/Routes/Named/Corpus.hs
+6
-2
Private.hs
src/Gargantext/API/Server/Named/Private.hs
+23
-23
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+1
-0
No files found.
gargantext.cabal
View file @
d6c2e293
...
...
@@ -326,6 +326,7 @@ library
Gargantext.API.Node.Contact
Gargantext.API.Node.Corpus.Export
Gargantext.API.Node.Corpus.Export.Types
Gargantext.API.Node.Corpus.Export.Utils
Gargantext.API.Node.Corpus.Searx
Gargantext.API.Node.Corpus.Subcorpus
Gargantext.API.Node.Document.Export
...
...
@@ -603,6 +604,7 @@ library
, singletons ^>= 3.0.2
, singletons-th >= 3.1 && < 3.2
, smtp-mail >= 0.3.0.0
, sqlite-simple >= 0.4.19 && < 0.5
, stemmer == 0.5.2
, stm >= 2.5.1.0 && < 2.6
, stm-containers >= 1.2.0.3 && < 1.3
...
...
src/Gargantext/API/Node/Corpus/Export.hs
View file @
d6c2e293
{-# LANGUAGE TypeOperators #-}
{-|
Module : Gargantext.API.Node.Corpus.Export
Description : Corpus export
...
...
@@ -17,27 +16,22 @@ Main exports of Gargantext:
module
Gargantext.API.Node.Corpus.Export
where
import
Data.HashMap.Strict
qualified
as
HashMap
import
Control.Exception.Safe
qualified
as
CES
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
import
Data.Text
(
pack
)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
import
Gargantext.API.Ngrams.Tools
(
getRepo
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
unNgramsTerm
)
)
import
Gargantext.API.Node.Corpus.Export.Types
(
Corpus
(
..
)
)
import
Gargantext.API.Node.Corpus.Export.Types
(
Corpus
(
..
),
CorpusSQLite
(
..
)
)
import
Gargantext.API.Node.Corpus.Export.Utils
(
getContextNgrams
,
mkCorpusSQLite
)
import
Gargantext.API.Node.Document.Export.Types
qualified
as
DocumentExport
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.Core.NodeStory.Types
(
NodeListStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
)
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getNgramsByContextOnlyUser
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Database.Prelude
(
DBCmdExtra
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Context
(
_context_id
)
import
Gargantext.Prelude
hiding
(
hash
)
...
...
@@ -48,10 +42,13 @@ import qualified Gargantext.API.Routes.Named.Corpus as Named
--------------------------------------------------
-- | Hashes are ordered by Set
getCorpus
::
forall
env
err
m
.
IsGargServer
env
err
m
getCorpus
::
(
CES
.
MonadMask
m
,
IsGargServer
env
err
m
)
=>
CorpusId
->
Named
.
CorpusExportAPI
(
AsServerT
m
)
getCorpus
cId
=
Named
.
CorpusExportAPI
$
\
lId
nt'
->
get_corpus
lId
nt'
getCorpus
cId
=
Named
.
CorpusExportAPI
{
Named
.
corpusExportEp
=
get_corpus
,
Named
.
corpusSQLiteEp
=
getCorpusSQLite
cId
}
where
get_corpus
::
IsGargServer
env
err
m
...
...
@@ -89,23 +86,15 @@ getCorpus cId = Named.CorpusExportAPI $ \lId nt' -> get_corpus lId nt'
$
Corpus
{
_c_corpus
=
Map
.
elems
r
,
_c_hash
=
hash
$
List
.
map
DocumentExport
.
_d_hash
$
Map
.
elems
r
}
getContextNgrams
::
HasNodeError
err
=>
CorpusId
->
ListId
->
ListType
->
NgramsType
->
NodeListStory
->
DBCmdExtra
err
(
Map
ContextId
(
Set
NgramsTerm
))
getContextNgrams
cId
lId
listType
nt
repo
=
do
-- lId <- case lId' of
-- Nothing -> defaultList cId
-- Just l -> pure l
lIds
<-
selectNodesWithUsername
NodeList
userMaster
let
ngs
=
filterListWithRoot
[
listType
]
$
mapTermListRoot
[
lId
]
nt
repo
-- TODO HashMap
r
<-
getNgramsByContextOnlyUser
cId
(
lIds
<>
[
lId
])
nt
(
HashMap
.
keys
ngs
)
pure
r
getCorpusSQLite
::
(
CES
.
MonadMask
m
,
IsGargServer
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
CorpusSQLite
)
getCorpusSQLite
cId
lId
=
do
corpusSQLite
<-
mkCorpusSQLite
cId
lId
pure
$
addHeader
(
"attachment; filename=GarganText_corpus-"
<>
pack
(
show
cId
)
<>
".sqlite"
)
$
corpusSQLite
-- TODO
-- Exports List
...
...
src/Gargantext/API/Node/Corpus/Export/Types.hs
View file @
d6c2e293
...
...
@@ -13,14 +13,13 @@ Portability : POSIX
module
Gargantext.API.Node.Corpus.Export.Types
where
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Swagger
(
ToSchema
(
..
),
genericDeclareNamedSchema
,
ToParamSchema
(
..
)
)
import
Data.Text
(
Text
)
import
GHC.Generics
(
Generic
)
import
Data.ByteString.Lazy
qualified
as
BSL
import
Data.Swagger
(
ToSchema
(
..
),
genericDeclareNamedSchema
,
ToParamSchema
(
..
),
NamedSchema
(
..
),
binarySchema
)
import
Gargantext.API.Node.Document.Export.Types
qualified
as
DocumentExport
import
Gargantext.Core.Types
(
TODO
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Servant
import
Gargantext.Prelude
import
Servant
(
Accept
(
..
),
MimeRender
(
mimeRender
),
OctetStream
)
-- Corpus Export
...
...
@@ -37,3 +36,17 @@ instance ToSchema Corpus where
instance
ToParamSchema
Corpus
where
toParamSchema
_
=
toParamSchema
(
Proxy
::
Proxy
TODO
)
$
(
deriveJSON
(
unPrefix
"_c_"
)
''
C
orpus
)
newtype
CorpusSQLite
=
CorpusSQLite
{
_cs_bs
::
BSL
.
ByteString
}
deriving
(
Generic
)
instance
Accept
CorpusSQLite
where
contentType
_
=
contentType
(
Proxy
::
Proxy
OctetStream
)
instance
MimeRender
OctetStream
CorpusSQLite
where
mimeRender
_
(
CorpusSQLite
bs
)
=
bs
instance
ToSchema
CorpusSQLite
where
declareNamedSchema
_
=
pure
$
NamedSchema
(
Just
"CorpusSQLite"
)
binarySchema
src/Gargantext/API/Node/Corpus/Export/Utils.hs
0 → 100644
View file @
d6c2e293
{-|
Module : Gargantext.API.Node.Corpus.Export.Utils
Description : Corpus export
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeApplications #-}
module
Gargantext.API.Node.Corpus.Export.Utils
where
import
Control.Exception.Safe
qualified
as
CES
import
Data.Aeson
qualified
as
Aeson
import
Data.ByteString.Lazy
qualified
as
BSL
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
import
Data.Version
(
showVersion
)
import
Database.SQLite.Simple
qualified
as
S
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
unNgramsTerm
)
)
import
Gargantext.API.Node.Corpus.Export.Types
(
CorpusSQLite
(
..
))
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
,
NodeListStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
NgramsTerms
))
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
NodeType
(
NodeList
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getNgramsByContextOnlyUser
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.List
(
HyperdataList
)
import
Gargantext.Database.Admin.Types.Node
(
unNodeId
,
ContextId
(
_ContextId
))
import
Gargantext.Database.Prelude
(
DBCmd
,
IsDBCmd
)
import
Gargantext.Database.Schema.Context
(
context_id
,
context_name
,
context_date
,
context_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hash_id
,
node_hyperdata
,
node_name
,
node_parent_id
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
import
Gargantext.Prelude
import
Paths_gargantext
qualified
as
PG
-- cabal magic build module
import
System.Directory
(
removeDirectoryRecursive
)
import
System.IO.Temp
(
createTempDirectory
,
getCanonicalTemporaryDirectory
)
getContextNgrams
::
HasNodeError
err
=>
CorpusId
->
ListId
->
ListType
->
NgramsType
->
NodeListStory
->
DBCmd
err
(
Map
ContextId
(
Set
NgramsTerm
))
getContextNgrams
cId
lId
listType
nt
repo
=
do
-- lId <- case lId' of
-- Nothing -> defaultList cId
-- Just l -> pure l
lIds
<-
selectNodesWithUsername
NodeList
userMaster
let
ngs
=
filterListWithRoot
[
listType
]
$
mapTermListRoot
[
lId
]
nt
repo
-- TODO HashMap
r
<-
getNgramsByContextOnlyUser
cId
(
lIds
<>
[
lId
])
nt
(
HashMap
.
keys
ngs
)
pure
r
mkCorpusSQLite
::
(
CES
.
MonadMask
m
,
HasNodeStoryEnv
env
,
HasNodeError
err
,
IsDBCmd
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
m
CorpusSQLite
mkCorpusSQLite
cId
lId
=
CES
.
bracket
setup
tearDown
$
\
(
fp
,
_fname
,
fpath
)
->
do
corpus
<-
getNodeWith
cId
(
Proxy
@
HyperdataCorpus
)
listId
<-
case
lId
of
Nothing
->
defaultList
cId
Just
l
->
pure
l
l
<-
getNodeWith
listId
(
Proxy
@
HyperdataList
)
repo
<-
getRepo
[
listId
]
let
nt
=
NgramsTerms
mapNgs
<-
getContextNgrams
cId
listId
MapTerm
nt
repo
stopNgs
<-
getContextNgrams
cId
listId
StopTerm
nt
repo
candidateNgs
<-
getContextNgrams
cId
listId
CandidateTerm
nt
repo
docs
<-
selectDocNodes
cId
liftBase
$
putText
$
"[mkCorpusSQLite] listId: "
<>
show
listId
liftBase
$
putText
$
"[mkCorpusSQLite] fp: "
<>
show
fp
liftBase
$
S
.
withConnection
fpath
$
\
conn
->
do
-- better performance
-- https://kerkour.com/sqlite-for-servers
S
.
execute_
conn
"PRAGMA journal_mode = WAL"
S
.
execute_
conn
"CREATE TABLE info (key, value);"
S
.
execute
conn
"INSERT INTO info (key, value) VALUES ('gargVersion', ?)"
(
S
.
Only
$
showVersion
PG
.
version
)
S
.
execute
conn
"INSERT INTO info (key, value) VALUES ('corpusId', ?)"
(
S
.
Only
$
unNodeId
cId
)
S
.
execute
conn
"INSERT INTO info (key, value) VALUES ('listId', ?)"
(
S
.
Only
$
unNodeId
listId
)
S
.
execute_
conn
"INSERT INTO info (key, value) VALUES ('created', datetime())"
S
.
execute_
conn
"CREATE TABLE corpus (id, name, hash, parent_id, hyperdata)"
S
.
execute
conn
"INSERT INTO corpus (id, name, hash, parent_id, hyperdata) VALUES (?, ?, ?, ?, ?)"
(
unNodeId
cId
,
corpus
^.
node_name
,
corpus
^.
node_hash_id
,
unNodeId
<$>
(
corpus
^.
node_parent_id
),
Aeson
.
encode
(
corpus
^.
node_hyperdata
))
S
.
execute_
conn
"CREATE TABLE lists (id, name, parent_id, hyperdata)"
S
.
execute
conn
"INSERT INTO lists (id, name, parent_id, hyperdata) VALUES (?, ?, ?, ?)"
(
unNodeId
listId
,
l
^.
node_name
,
unNodeId
<$>
(
l
^.
node_parent_id
),
Aeson
.
encode
(
l
^.
node_hyperdata
))
S
.
execute_
conn
"CREATE TABLE ngrams (context_id, terms, type_)"
let
insertTerms
ngs
type_
=
do
let
ngs'
=
concatMap
(
\
(
ctxId
,
ngrams
)
->
(
\
n
->
(
_ContextId
ctxId
,
unNgramsTerm
n
,
type_
))
<$>
Set
.
toList
ngrams
)
(
Map
.
toList
ngs
)
S
.
executeMany
conn
"INSERT INTO ngrams (context_id, terms, type_) VALUES (?, ?, ?)"
ngs'
insertTerms
mapNgs
(
"map"
::
Text
)
insertTerms
stopNgs
(
"stop"
::
Text
)
insertTerms
candidateNgs
(
"candidate"
::
Text
)
S
.
execute_
conn
"CREATE TABLE documents (context_id, name, date, hyperdata)"
S
.
executeMany
conn
"INSERT INTO documents (context_id, name, date, hyperdata) VALUES (?, ?, ?, ?)"
((
\
c
->
(
unNodeId
(
c
^.
context_id
),
c
^.
context_name
,
c
^.
context_date
,
Aeson
.
encode
(
c
^.
context_hyperdata
)))
<$>
docs
)
bsl
<-
liftBase
$
BSL
.
readFile
fpath
pure
$
CorpusSQLite
bsl
where
setup
=
do
tmpDir
<-
liftBase
getCanonicalTemporaryDirectory
fp
<-
liftBase
$
createTempDirectory
tmpDir
"sqlite"
let
fname
=
"gargantext.sqlite"
let
fpath
=
fp
<>
"/"
<>
fname
pure
(
fp
,
fname
,
fpath
)
tearDown
(
fp
,
_fname
,
_fpath
)
=
do
liftBase
$
removeDirectoryRecursive
fp
src/Gargantext/API/Routes/Named/Corpus.hs
View file @
d6c2e293
...
...
@@ -25,7 +25,7 @@ import Data.Aeson.TH (deriveJSON)
import
Data.Swagger
(
ToSchema
(
..
),
genericDeclareNamedSchema
)
import
Data.Text
(
Text
)
import
GHC.Generics
import
Gargantext.API.Node.Corpus.Export.Types
(
Corpus
)
import
Gargantext.API.Node.Corpus.Export.Types
(
Corpus
,
CorpusSQLite
)
import
Gargantext.API.Node.Types
(
NewWithForm
,
WithQuery
)
import
Gargantext.API.Worker
(
WorkerAPI
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
...
...
@@ -35,12 +35,16 @@ import Gargantext.Prelude (Bool)
import
Servant
--------------------------------------------------
newtype
CorpusExportAPI
mode
=
CorpusExportAPI
data
CorpusExportAPI
mode
=
CorpusExportAPI
{
corpusExportEp
::
mode
:-
Summary
"Corpus Export"
:>
"export"
:>
QueryParam
"listId"
ListId
:>
QueryParam
"ngramsType"
NgramsType
:>
Get
'[
J
SON
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
Corpus
)
,
corpusSQLiteEp
::
mode
:-
Summary
"Corpus SQLite export"
:>
"sqlite"
:>
QueryParam
"listId"
ListId
:>
Get
'[
O
ctetStream
]
(
Headers
'[
S
ervant
.
Header
"Content-Disposition"
Text
]
CorpusSQLite
)
}
deriving
Generic
newtype
AddWithForm
mode
=
AddWithForm
...
...
src/Gargantext/API/Server/Named/Private.hs
View file @
d6c2e293
...
...
@@ -43,27 +43,27 @@ serverPrivateGargAPI'
::
AuthenticatedUser
->
Named
.
GargPrivateAPI'
(
AsServerT
(
GargM
Env
BackendInternalError
))
serverPrivateGargAPI'
authenticatedUser
@
(
AuthenticatedUser
userNodeId
userId
)
=
Named
.
GargPrivateAPI'
{
gargAdminAPI
=
serverGargAdminAPI
,
nodeEp
=
nodeAPI
authenticatedUser
,
contextEp
=
contextAPI
(
Proxy
::
Proxy
HyperdataAny
)
authenticatedUser
,
corpusNodeAPI
=
corpusNodeAPI
authenticatedUser
,
corpusNodeNodeAPI
=
nodeNodeAPI
(
Proxy
::
Proxy
HyperdataAny
)
authenticatedUser
,
corpusExportAPI
=
CorpusExport
.
getCorpus
,
annuaireEp
=
annuaireNodeAPI
authenticatedUser
,
contactAPI
=
contactAPI
authenticatedUser
,
tableNgramsAPI
=
apiNgramsTableDoc
authenticatedUser
,
phyloExportAPI
=
PhyloExport
.
api
userNodeId
,
documentExportAPI
=
documentExportAPI
userNodeId
,
countAPI
=
Count
.
countAPI
,
graphAPI
=
Viz
.
graphAPI
authenticatedUser
userId
,
treeAPI
=
Tree
.
treeAPI
authenticatedUser
,
treeFlatAPI
=
Tree
.
treeFlatAPI
authenticatedUser
,
membersAPI
=
members
,
addWithFormAPI
=
addCorpusWithForm
(
RootId
userNodeId
)
,
addWithQueryEp
=
addCorpusWithQuery
(
RootId
userNodeId
)
,
makeSubcorpusAPI
=
Subcorpus
.
makeSubcorpus
userId
,
listGetAPI
=
List
.
getAPI
,
listJsonAPI
=
List
.
jsonAPI
,
listTsvAPI
=
List
.
tsvAPI
,
shareUrlAPI
=
shareURL
{
gargAdminAPI
=
serverGargAdminAPI
,
nodeEp
=
nodeAPI
authenticatedUser
,
contextEp
=
contextAPI
(
Proxy
::
Proxy
HyperdataAny
)
authenticatedUser
,
corpusNodeAPI
=
corpusNodeAPI
authenticatedUser
,
corpusNodeNodeAPI
=
nodeNodeAPI
(
Proxy
::
Proxy
HyperdataAny
)
authenticatedUser
,
corpusExportAPI
=
CorpusExport
.
getCorpus
,
annuaireEp
=
annuaireNodeAPI
authenticatedUser
,
contactAPI
=
contactAPI
authenticatedUser
,
tableNgramsAPI
=
apiNgramsTableDoc
authenticatedUser
,
phyloExportAPI
=
PhyloExport
.
api
userNodeId
,
documentExportAPI
=
documentExportAPI
userNodeId
,
countAPI
=
Count
.
countAPI
,
graphAPI
=
Viz
.
graphAPI
authenticatedUser
userId
,
treeAPI
=
Tree
.
treeAPI
authenticatedUser
,
treeFlatAPI
=
Tree
.
treeFlatAPI
authenticatedUser
,
membersAPI
=
members
,
addWithFormAPI
=
addCorpusWithForm
(
RootId
userNodeId
)
,
addWithQueryEp
=
addCorpusWithQuery
(
RootId
userNodeId
)
,
makeSubcorpusAPI
=
Subcorpus
.
makeSubcorpus
userId
,
listGetAPI
=
List
.
getAPI
,
listJsonAPI
=
List
.
jsonAPI
,
listTsvAPI
=
List
.
tsvAPI
,
shareUrlAPI
=
shareURL
}
src/Gargantext/Database/Query/Table/Node.hs
View file @
d6c2e293
...
...
@@ -305,6 +305,7 @@ selectNodesIdWithType nt = proc () -> do
restrict
-<
tn
.==
(
sqlInt4
$
toDBid
nt
)
returnA
-<
_node_id
row
-- | Get node, Hyperdata is 'Aeson.Value'
getNode
::
HasNodeError
err
=>
NodeId
->
DBCmd
err
(
Node
Value
)
getNode
nId
=
do
maybeNode
<-
headMay
<$>
runOpaQuery
(
selectNode
(
pgNodeId
nId
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment