Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
7fad2fc3
Verified
Commit
7fad2fc3
authored
Feb 28, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[sqlite] implement ngrams reading for CorpusSQLiteData
parent
625d6bbe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
17 deletions
+41
-17
Utils.hs
src/Gargantext/API/Node/Corpus/Export/Utils.hs
+38
-17
Export.hs
test/Test/API/Export.hs
+3
-0
No files found.
src/Gargantext/API/Node/Corpus/Export/Utils.hs
View file @
7fad2fc3
...
@@ -19,12 +19,12 @@ import Data.ByteString.Lazy qualified as BSL
...
@@ -19,12 +19,12 @@ import Data.ByteString.Lazy qualified as BSL
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
qualified
as
Map
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
import
Data.Set
qualified
as
Set
import
Data.Time.Clock
(
getCurrentTime
)
import
Data.Time.Clock
(
getCurrentTime
,
UTCTime
)
import
Data.Time.Format.ISO8601
(
iso8601ParseM
,
iso8601Show
)
import
Data.Time.Format.ISO8601
(
iso8601ParseM
,
iso8601Show
)
import
Data.Version
(
parseVersion
,
showVersion
)
import
Data.Version
(
parseVersion
,
showVersion
)
import
Database.SQLite.Simple
qualified
as
S
import
Database.SQLite.Simple
qualified
as
S
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
unNgramsTerm
)
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
)
)
import
Gargantext.API.Node.Corpus.Export.Types
(
CorpusSQLite
(
..
),
CorpusSQLiteData
(
..
))
import
Gargantext.API.Node.Corpus.Export.Types
(
CorpusSQLite
(
..
),
CorpusSQLiteData
(
..
))
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
,
NodeListStory
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
,
NodeListStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
NgramsTerms
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
NgramsTerms
))
...
@@ -33,8 +33,9 @@ import Gargantext.Core.Types.Main (ListType(..))
...
@@ -33,8 +33,9 @@ import Gargantext.Core.Types.Main (ListType(..))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getNgramsByContextOnlyUser
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getNgramsByContextOnlyUser
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.List
(
HyperdataList
)
import
Gargantext.Database.Admin.Types.Hyperdata.List
(
HyperdataList
)
import
Gargantext.Database.Admin.Types.Node
(
unNodeId
,
ContextId
(
_ContextId
),
NodeId
(
UnsafeMkNodeId
))
import
Gargantext.Database.Admin.Types.Node
(
unNodeId
,
ContextId
(
..
),
NodeId
(
UnsafeMkNodeId
))
import
Gargantext.Database.Prelude
(
DBCmd
,
IsDBCmd
)
import
Gargantext.Database.Prelude
(
DBCmd
,
IsDBCmd
)
import
Gargantext.Database.Schema.Context
(
context_id
,
context_name
,
context_date
,
context_hyperdata
)
import
Gargantext.Database.Schema.Context
(
context_id
,
context_name
,
context_date
,
context_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hash_id
,
node_hyperdata
,
node_name
,
node_parent_id
)
import
Gargantext.Database.Schema.Node
(
node_hash_id
,
node_hyperdata
,
node_name
,
node_parent_id
)
...
@@ -178,7 +179,7 @@ readCorpusSQLite :: ( CES.MonadMask m
...
@@ -178,7 +179,7 @@ readCorpusSQLite :: ( CES.MonadMask m
=>
CorpusSQLite
=>
CorpusSQLite
->
m
(
Either
Text
CorpusSQLiteData
)
->
m
(
Either
Text
CorpusSQLiteData
)
readCorpusSQLite
(
CorpusSQLite
{
_cs_bs
})
=
withTempSQLiteDir
$
\
(
_fp
,
_fname
,
fpath
)
->
liftBase
$
do
readCorpusSQLite
(
CorpusSQLite
{
_cs_bs
})
=
withTempSQLiteDir
$
\
(
_fp
,
_fname
,
fpath
)
->
liftBase
$
do
(
info
,
corpusData
,
listData
,
documents
)
<-
S
.
withConnection
fpath
$
\
conn
->
do
(
info
,
corpusData
,
listData
,
documents
,
ngrams
)
<-
S
.
withConnection
fpath
$
\
conn
->
do
[
S
.
Only
version
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'gargVersion'"
[
S
.
Only
version
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'gargVersion'"
[
S
.
Only
cId
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'corpusId'"
[
S
.
Only
cId
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'corpusId'"
[
S
.
Only
lId
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'listId'"
[
S
.
Only
lId
]
<-
S
.
query_
conn
"SELECT value FROM info WHERE key = 'listId'"
...
@@ -187,28 +188,27 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
...
@@ -187,28 +188,27 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
[
corpusData
]
<-
S
.
query_
conn
"SELECT name, hash, parent_id, hyperdata FROM corpus"
[
corpusData
]
<-
S
.
query_
conn
"SELECT name, hash, parent_id, hyperdata FROM corpus"
[
listData
]
<-
S
.
query_
conn
"SELECT name, parent_id, hyperdata FROM lists"
[
listData
]
<-
S
.
query_
conn
"SELECT name, parent_id, hyperdata FROM lists"
-- [ngrams] <- S.query_ conn "SELECT context_id, terms, type_ FROM ngrams"
documents
<-
S
.
query_
conn
"SELECT context_id, name, date, hyperdata FROM documents"
documents
<-
S
.
query_
conn
"SELECT context_id, name, date, hyperdata FROM documents"
ngrams
<-
S
.
query_
conn
"SELECT context_id, terms, type_ FROM ngrams"
pure
(
info
,
corpusData
,
listData
,
documents
)
pure
(
info
,
corpusData
,
listData
,
documents
,
ngrams
)
let
(
version
,
cId
,
lId
,
created
)
=
info
let
(
version
,
cId
,
lId
,
created
)
=
info
let
(
_csd_corpus_name
,
_csd_corpus_hash
,
corpusParent
,
corpusHyperdata
)
=
corpusData
let
(
_csd_corpus_name
,
_csd_corpus_hash
,
corpusParent
,
corpusHyperdata
)
=
corpusData
let
(
_csd_list_name
,
listParent
,
listHyperdata
)
=
listData
let
(
_csd_list_name
,
listParent
,
listHyperdata
)
=
listData
let
parseCtx
(
ctxId
,
name
,
date
,
hd
)
=
case
(
iso8601ParseM
date
,
Aeson
.
decode
hd
)
of
(
Just
d
,
Just
h
)
->
Right
(
UnsafeMkNodeId
ctxId
,
name
,
d
,
h
)
_
->
Left
(
"Context "
<>
show
ctxId
<>
" parse error"
::
Text
)
let
(
context_errors
,
_csd_contexts
)
=
partitionEithers
(
parseCtx
<$>
documents
)
let
(
context_errors
,
_csd_contexts
)
=
partitionEithers
(
parseCtx
<$>
documents
)
let
(
ngrams_errors
,
ngrams_
)
=
partitionEithers
(
parseNgrams
<$>
ngrams
)
-- TODO pure $ do in Either monad
case
(
readP_to_S
parseVersion
version
case
(
readP_to_S
parseVersion
version
,
iso8601ParseM
created
,
iso8601ParseM
created
,
Aeson
.
decode
corpusHyperdata
,
Aeson
.
decode
corpusHyperdata
,
Aeson
.
decode
listHyperdata
,
Aeson
.
decode
listHyperdata
,
context_errors
)
of
,
context_errors
([(
_csd_version
,
_
)],
Just
_csd_created
,
Just
_csd_corpus_hyperdata
,
Just
_csd_list_hyperdata
,
[]
)
->
do
,
ngrams_errors
)
of
([(
_csd_version
,
_
)],
Just
_csd_created
,
Just
_csd_corpus_hyperdata
,
Just
_csd_list_hyperdata
,
[]
,
[]
)
->
do
let
_csd_cId
=
UnsafeMkNodeId
cId
let
_csd_cId
=
UnsafeMkNodeId
cId
let
_csd_lId
=
UnsafeMkNodeId
lId
let
_csd_lId
=
UnsafeMkNodeId
lId
...
@@ -216,14 +216,35 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
...
@@ -216,14 +216,35 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
let
_csd_list_parent
=
UnsafeMkNodeId
<$>
listParent
let
_csd_list_parent
=
UnsafeMkNodeId
<$>
listParent
-- TODO
let
_csd_map_context_ngrams
=
filterNgrams
MapTerm
ngrams_
let
_csd_map_context_ngrams
=
Map
.
empty
let
_csd_stop_context_ngrams
=
filterNgrams
StopTerm
ngrams_
let
_csd_stop_context_ngrams
=
Map
.
empty
let
_csd_candidate_context_ngrams
=
filterNgrams
CandidateTerm
ngrams_
let
_csd_candidate_context_ngrams
=
Map
.
empty
pure
$
Right
$
CorpusSQLiteData
{
..
}
pure
$
Right
$
CorpusSQLiteData
{
..
}
_
->
pure
$
Left
"Parse error"
_
->
pure
$
Left
"Parse error"
where
parseCtx
::
(
Int
,
Text
,
Prelude
.
String
,
BSL
.
ByteString
)
->
Either
Text
(
NodeId
,
Text
,
UTCTime
,
HyperdataDocument
)
parseCtx
(
ctxId
,
name
,
date
,
hd
)
=
case
(
iso8601ParseM
date
,
Aeson
.
decode
hd
)
of
(
Just
d
,
Just
h
)
->
Right
(
UnsafeMkNodeId
ctxId
,
name
,
d
,
h
)
_
->
Left
(
"Context "
<>
show
ctxId
<>
" parse error"
::
Text
)
parseNgrams
::
(
Int
,
Text
,
Text
)
->
Either
Text
(
ListType
,
(
ContextId
,
NgramsTerm
))
parseNgrams
(
ctxId
,
term
,
type_
)
=
case
type_
of
"map"
->
Right
(
MapTerm
,
(
UnsafeMkContextId
ctxId
,
NgramsTerm
term
)
)
"stop"
->
Right
(
StopTerm
,
(
UnsafeMkContextId
ctxId
,
NgramsTerm
term
)
)
"candidate"
->
Right
(
CandidateTerm
,
(
UnsafeMkContextId
ctxId
,
NgramsTerm
term
)
)
_
->
Left
(
"Unknown term "
<>
term
)
filterNgrams
::
ListType
->
[(
ListType
,
(
ContextId
,
NgramsTerm
))]
->
Map
ContextId
(
Set
NgramsTerm
)
filterNgrams
lt
ngrams_
=
Map
.
fromListWith
(
<>
)
$
map
(
\
(
_
,
(
ctxId
,
term
))
->
(
ctxId
,
Set
.
singleton
term
))
(
filter
(
\
(
lt_
,
_
)
->
lt
==
lt_
)
ngrams_
)
withTempSQLiteDir
::
(
CES
.
MonadMask
m
,
MonadBase
IO
m
)
withTempSQLiteDir
::
(
CES
.
MonadMask
m
,
MonadBase
IO
m
)
=>
((
FilePath
,
Prelude
.
String
,
FilePath
)
->
m
a
)
=>
((
FilePath
,
Prelude
.
String
,
FilePath
)
->
m
a
)
->
m
a
->
m
a
...
...
test/Test/API/Export.hs
View file @
7fad2fc3
...
@@ -65,6 +65,9 @@ tests = sequential $ around withTestDBAndPort $ beforeWith dbEnvSetup $ do
...
@@ -65,6 +65,9 @@ tests = sequential $ around withTestDBAndPort $ beforeWith dbEnvSetup $ do
_csd_cId
`
shouldBe
`
corpusId
_csd_cId
`
shouldBe
`
corpusId
_csd_lId
`
shouldBe
`
aliceListId
_csd_lId
`
shouldBe
`
aliceListId
length
_csd_contexts
`
shouldBe
`
2
length
_csd_contexts
`
shouldBe
`
2
length
_csd_map_context_ngrams
`
shouldBe
`
0
length
_csd_stop_context_ngrams
`
shouldBe
`
0
length
_csd_candidate_context_ngrams
`
shouldBe
`
0
describe
"GET /api/v1.0/corpus/cId/sqlite"
$
do
describe
"GET /api/v1.0/corpus/cId/sqlite"
$
do
it
"returns correct SQLite db"
$
\
ctx
->
do
it
"returns correct SQLite db"
$
\
ctx
->
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment