Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
7fad2fc3
Verified
Commit
7fad2fc3
authored
Feb 28, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[sqlite] implement ngrams reading for CorpusSQLiteData
parent
625d6bbe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
17 deletions
+41
-17
Utils.hs
src/Gargantext/API/Node/Corpus/Export/Utils.hs
+38
-17
Export.hs
test/Test/API/Export.hs
+3
-0
No files found.
src/Gargantext/API/Node/Corpus/Export/Utils.hs
View file @
7fad2fc3
...
...
@@ -19,12 +19,12 @@ import Data.ByteString.Lazy qualified as BSL
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
import
Data.Time.Clock
(
getCurrentTime
)
import
Data.Time.Clock
(
getCurrentTime
,
UTCTime
)
import
Data.Time.Format.ISO8601
(
iso8601ParseM
,
iso8601Show
)
import
Data.Version
(
parseVersion
,
showVersion
)
import
Database.SQLite.Simple
qualified
as
S
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
unNgramsTerm
)
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
)
)
import
Gargantext.API.Node.Corpus.Export.Types
(
CorpusSQLite
(
..
),
CorpusSQLiteData
(
..
))
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
,
NodeListStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
NgramsTerms
))
...
...
@@ -33,8 +33,9 @@ import Gargantext.Core.Types.Main (ListType(..))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getNgramsByContextOnlyUser
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.List
(
HyperdataList
)
import
Gargantext.Database.Admin.Types.Node
(
unNodeId
,
ContextId
(
_ContextId
),
NodeId
(
UnsafeMkNodeId
))
import
Gargantext.Database.Admin.Types.Node
(
unNodeId
,
ContextId
(
..
),
NodeId
(
UnsafeMkNodeId
))
import
Gargantext.Database.Prelude
(
DBCmd
,
IsDBCmd
)
import
Gargantext.Database.Schema.Context
(
context_id
,
context_name
,
context_date
,
context_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hash_id
,
node_hyperdata
,
node_name
,
node_parent_id
)
...
...
@@ -178,7 +179,7 @@ readCorpusSQLite :: ( CES.MonadMask m
=>
CorpusSQLite
->
m
(
Either
Text
CorpusSQLiteData
)
readCorpusSQLite
(
CorpusSQLite
{
_cs_bs
})
=
withTempSQLiteDir
$
\
(
_fp
,
_fname
,
fpath
)
->
liftBase
$
do
(
info
,
corpusData
,
listData
,
documents
)
<-
S
.
withConnection
fpath
$
\
conn
->
do
(
info
,
corpusData
,
listData
,
documents
,
ngrams
)
<-
S
.
withConnection
fpath
$
\
conn
->
do
[
S
.
Only
version
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'gargVersion'"
[
S
.
Only
cId
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'corpusId'"
[
S
.
Only
lId
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'listId'"
...
...
@@ -187,28 +188,27 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
[
corpusData
]
<-
S
.
query_
conn
"SELECT name, hash, parent_id, hyperdata FROM corpus"
[
listData
]
<-
S
.
query_
conn
"SELECT name, parent_id, hyperdata FROM lists"
-- [ngrams] <- S.query_ conn "SELECT context_id, terms, type_ FROM ngrams"
documents
<-
S
.
query_
conn
"SELECT context_id, name, date, hyperdata FROM documents"
ngrams
<-
S
.
query_
conn
"SELECT context_id, terms, type_ FROM ngrams"
pure
(
info
,
corpusData
,
listData
,
documents
)
pure
(
info
,
corpusData
,
listData
,
documents
,
ngrams
)
let
(
version
,
cId
,
lId
,
created
)
=
info
let
(
_csd_corpus_name
,
_csd_corpus_hash
,
corpusParent
,
corpusHyperdata
)
=
corpusData
let
(
_csd_list_name
,
listParent
,
listHyperdata
)
=
listData
let
parseCtx
(
ctxId
,
name
,
date
,
hd
)
=
case
(
iso8601ParseM
date
,
Aeson
.
decode
hd
)
of
(
Just
d
,
Just
h
)
->
Right
(
UnsafeMkNodeId
ctxId
,
name
,
d
,
h
)
_
->
Left
(
"Context "
<>
show
ctxId
<>
" parse error"
::
Text
)
let
(
context_errors
,
_csd_contexts
)
=
partitionEithers
(
parseCtx
<$>
documents
)
let
(
ngrams_errors
,
ngrams_
)
=
partitionEithers
(
parseNgrams
<$>
ngrams
)
-- TODO pure $ do in Either monad
case
(
readP_to_S
parseVersion
version
,
iso8601ParseM
created
,
Aeson
.
decode
corpusHyperdata
,
Aeson
.
decode
listHyperdata
,
context_errors
)
of
([(
_csd_version
,
_
)],
Just
_csd_created
,
Just
_csd_corpus_hyperdata
,
Just
_csd_list_hyperdata
,
[]
)
->
do
,
context_errors
,
ngrams_errors
)
of
([(
_csd_version
,
_
)],
Just
_csd_created
,
Just
_csd_corpus_hyperdata
,
Just
_csd_list_hyperdata
,
[]
,
[]
)
->
do
let
_csd_cId
=
UnsafeMkNodeId
cId
let
_csd_lId
=
UnsafeMkNodeId
lId
...
...
@@ -216,14 +216,35 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
let
_csd_list_parent
=
UnsafeMkNodeId
<$>
listParent
-- TODO
let
_csd_map_context_ngrams
=
Map
.
empty
let
_csd_stop_context_ngrams
=
Map
.
empty
let
_csd_candidate_context_ngrams
=
Map
.
empty
let
_csd_map_context_ngrams
=
filterNgrams
MapTerm
ngrams_
let
_csd_stop_context_ngrams
=
filterNgrams
StopTerm
ngrams_
let
_csd_candidate_context_ngrams
=
filterNgrams
CandidateTerm
ngrams_
pure
$
Right
$
CorpusSQLiteData
{
..
}
_
->
pure
$
Left
"Parse error"
where
parseCtx
::
(
Int
,
Text
,
Prelude
.
String
,
BSL
.
ByteString
)
->
Either
Text
(
NodeId
,
Text
,
UTCTime
,
HyperdataDocument
)
parseCtx
(
ctxId
,
name
,
date
,
hd
)
=
case
(
iso8601ParseM
date
,
Aeson
.
decode
hd
)
of
(
Just
d
,
Just
h
)
->
Right
(
UnsafeMkNodeId
ctxId
,
name
,
d
,
h
)
_
->
Left
(
"Context "
<>
show
ctxId
<>
" parse error"
::
Text
)
parseNgrams
::
(
Int
,
Text
,
Text
)
->
Either
Text
(
ListType
,
(
ContextId
,
NgramsTerm
))
parseNgrams
(
ctxId
,
term
,
type_
)
=
case
type_
of
"map"
->
Right
(
MapTerm
,
(
UnsafeMkContextId
ctxId
,
NgramsTerm
term
)
)
"stop"
->
Right
(
StopTerm
,
(
UnsafeMkContextId
ctxId
,
NgramsTerm
term
)
)
"candidate"
->
Right
(
CandidateTerm
,
(
UnsafeMkContextId
ctxId
,
NgramsTerm
term
)
)
_
->
Left
(
"Unknown term "
<>
term
)
filterNgrams
::
ListType
->
[(
ListType
,
(
ContextId
,
NgramsTerm
))]
->
Map
ContextId
(
Set
NgramsTerm
)
filterNgrams
lt
ngrams_
=
Map
.
fromListWith
(
<>
)
$
map
(
\
(
_
,
(
ctxId
,
term
))
->
(
ctxId
,
Set
.
singleton
term
))
(
filter
(
\
(
lt_
,
_
)
->
lt
==
lt_
)
ngrams_
)
withTempSQLiteDir
::
(
CES
.
MonadMask
m
,
MonadBase
IO
m
)
=>
((
FilePath
,
Prelude
.
String
,
FilePath
)
->
m
a
)
->
m
a
...
...
test/Test/API/Export.hs
View file @
7fad2fc3
...
...
@@ -65,6 +65,9 @@ tests = sequential $ around withTestDBAndPort $ beforeWith dbEnvSetup $ do
_csd_cId
`
shouldBe
`
corpusId
_csd_lId
`
shouldBe
`
aliceListId
length
_csd_contexts
`
shouldBe
`
2
length
_csd_map_context_ngrams
`
shouldBe
`
0
length
_csd_stop_context_ngrams
`
shouldBe
`
0
length
_csd_candidate_context_ngrams
`
shouldBe
`
0
describe
"GET /api/v1.0/corpus/cId/sqlite"
$
do
it
"returns correct SQLite db"
$
\
ctx
->
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment