Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8667dfeb
Commit
8667dfeb
authored
Aug 02, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[frameUpload] implement CSV v3 upload for corpus
parent
6549732e
Pipeline
#1695
passed with stage
in 35 minutes and 14 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
84 additions
and
55 deletions
+84
-55
package.yaml
package.yaml
+1
-0
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+14
-0
Export.hs
src/Gargantext/API/Node/Corpus/Export.hs
+5
-4
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+9
-18
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+35
-17
Routes.hs
src/Gargantext/API/Routes.hs
+16
-13
Prelude.hs
src/Gargantext/Database/Prelude.hs
+2
-1
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+2
-2
No files found.
package.yaml
View file @
8667dfeb
...
...
@@ -224,6 +224,7 @@ library:
-
transformers
-
transformers-base
-
unordered-containers
-
utf8-string
-
uuid
-
validity
-
vector
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
8667dfeb
...
...
@@ -101,6 +101,8 @@ data JobLog = JobLog
}
deriving
(
Show
,
Generic
)
makeLenses
''
J
obLog
instance
Arbitrary
JobLog
where
arbitrary
=
JobLog
<$>
arbitrary
...
...
@@ -129,6 +131,18 @@ type ScrapersEnv = JobEnv JobLog JobLog
type
ScraperAPI
=
AsyncJobsAPI
JobLog
ScraperInput
JobLog
jobLogInit
::
Int
->
JobLog
jobLogInit
n
=
JobLog
{
_scst_succeeded
=
Just
n
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
jobLogSucc
::
JobLog
->
JobLog
jobLogSucc
jl
=
over
(
scst_succeeded
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
c
->
c
-
1
)
jl
jobLogErr
::
JobLog
->
JobLog
jobLogErr
jl
=
over
(
scst_failed
.
_Just
)
(
+
1
)
$
over
(
scst_remaining
.
_Just
)
(
\
c
->
c
-
1
)
jl
------------------------------------------------------------------------
type
AsyncJobs
event
ctI
input
output
=
AsyncJobsAPI'
'U
n
safe
'S
a
fe
ctI
'[
J
SON
]
Maybe
event
input
output
src/Gargantext/API/Node/Corpus/Export.hs
View file @
8667dfeb
...
...
@@ -19,6 +19,11 @@ module Gargantext.API.Node.Corpus.Export
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Set
(
Set
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.HashMap.Strict
as
HashMap
import
Gargantext.API.Node.Corpus.Export.Types
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
mapTermListRoot
,
getRepo
)
...
...
@@ -36,10 +41,6 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
(
_node_id
,
_node_hyperdata
)
import
Gargantext.Prelude
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.HashMap.Strict
as
HashMap
--------------------------------------------------
-- | Hashes are ordered by Set
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
8667dfeb
...
...
@@ -35,7 +35,7 @@ import Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
jobLogSucc
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
...
...
@@ -234,15 +234,12 @@ addToCorpusWithForm :: FlowCmdM env err m
->
CorpusId
->
NewWithForm
->
(
JobLog
->
m
()
)
->
JobLog
->
m
JobLog
addToCorpusWithForm
user
cid
(
NewWithForm
ft
d
l
_n
)
logStatus
=
do
addToCorpusWithForm
user
cid
(
NewWithForm
ft
d
l
_n
)
logStatus
jobLog
=
do
printDebug
"[addToCorpusWithForm] Parsing corpus: "
cid
printDebug
"[addToCorpusWithForm] fileType"
ft
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
,
_scst_events
=
Just
[]
}
logStatus
jobLog
let
parse
=
case
ft
of
CSV_HAL
->
Parser
.
parseFormat
Parser
.
CsvHal
...
...
@@ -256,12 +253,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
<$>
parse
(
cs
d
)
printDebug
"Parsing corpus finished : "
cid
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
logStatus
jobLog2
printDebug
"Starting extraction : "
cid
-- TODO granularity of the logStatus
...
...
@@ -274,11 +266,10 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus = do
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
pure
jobLog3
where
jobLog2
=
jobLogSucc
jobLog
jobLog3
=
jobLogSucc
jobLog2
{-
addToCorpusWithFile :: FlowCmdM env err m
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
8667dfeb
...
...
@@ -6,18 +6,27 @@ module Gargantext.API.Node.FrameCalcUpload where
import
Control.Lens
((
^.
))
import
Data.Aeson
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.ByteString.UTF8
as
BSU8
import
Data.Swagger
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
(
newManager
,
httpLbs
,
parseRequest
,
responseBody
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Servant
import
Servant.Job.Async
import
Web.FormUrlEncoded
(
FromForm
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
jobLogInit
,
jobLogSucc
,
jobLogErr
)
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Types
(
NewWithForm
(
..
))
import
Gargantext.API.Prelude
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
get
ClosestParentIdByType
,
get
NodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
...
...
@@ -38,7 +47,9 @@ type FrameCalcUploadAPI = Summary " FrameCalc upload"
frameCalcUploadAPI
::
UserId
->
NodeId
->
GargServer
FrameCalcUploadAPI
frameCalcUploadAPI
uId
nId
=
serveJobsAPI
$
JobFunction
(
\
p
logs
->
frameCalcUploadAsync
uId
nId
p
(
liftBase
.
logs
))
JobFunction
(
\
p
logs
->
frameCalcUploadAsync
uId
nId
p
(
liftBase
.
logs
)
(
jobLogInit
5
)
)
frameCalcUploadAsync
::
FlowCmdM
env
err
m
...
...
@@ -46,26 +57,33 @@ frameCalcUploadAsync :: FlowCmdM env err m
->
NodeId
->
FrameCalcUpload
->
(
JobLog
->
m
()
)
->
JobLog
->
m
JobLog
frameCalcUploadAsync
uId
nId
_f
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
frameCalcUploadAsync
uId
nId
_f
logStatus
jobLog
=
do
logStatus
jobLog
printDebug
"[frameCalcUploadAsync] uId"
uId
printDebug
"[frameCalcUploadAsync] nId"
nId
--
printDebug "[frameCalcUploadAsync] uId" uId
--
printDebug "[frameCalcUploadAsync] nId" nId
node
<-
getNodeWith
nId
(
Proxy
::
Proxy
HyperdataFrame
)
let
(
HyperdataFrame
{
_hf_base
=
base
,
_hf_frame_id
=
frame_id
})
=
node
^.
node_hyperdata
let
csvUrl
=
base
<>
"/"
<>
frame_id
<>
".csv"
printDebug
"[frameCalcUploadAsync] csvUrl"
csvUrl
--
printDebug "[frameCalcUploadAsync] csvUrl" csvUrl
pure
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
res
<-
liftBase
$
do
manager
<-
newManager
tlsManagerSettings
req
<-
parseRequest
$
T
.
unpack
csvUrl
httpLbs
req
manager
let
body
=
T
.
pack
$
BSU8
.
toString
$
BSL
.
toStrict
$
responseBody
res
mCId
<-
getClosestParentIdByType
nId
NodeCorpus
-- printDebug "[frameCalcUploadAsync] mCId" mCId
jobLog2
<-
case
mCId
of
Nothing
->
pure
$
jobLogErr
jobLog
Just
cId
->
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
pure
$
jobLogSucc
jobLog2
src/Gargantext/API/Routes.hs
View file @
8667dfeb
...
...
@@ -27,9 +27,23 @@ import Control.Concurrent (threadDelay)
import
Control.Lens
(
view
)
import
Data.Text
(
Text
)
import
Data.Validity
import
Servant
import
Servant.Auth
as
SA
import
Servant.Auth.Swagger
()
import
Servant.Job.Async
import
Servant.Swagger.UI
import
qualified
Gargantext.API.Ngrams.List
as
List
import
qualified
Gargantext.API.Node.Contact
as
Contact
import
qualified
Gargantext.API.Node.Corpus.Annuaire
as
Annuaire
import
qualified
Gargantext.API.Node.Corpus.Export
as
Export
import
qualified
Gargantext.API.Node.Corpus.Export.Types
as
Export
import
qualified
Gargantext.API.Node.Corpus.New
as
New
import
qualified
Gargantext.API.Public
as
Public
import
Gargantext.API.Admin.Auth.Types
(
AuthRequest
,
AuthResponse
,
AuthenticatedUser
(
..
),
PathId
(
..
))
import
Gargantext.API.Admin.Auth
(
withAccess
)
import
Gargantext.API.Admin.FrontEnd
(
FrontEndAPI
)
import
Gargantext.API.Admin.Orchestrator.Types
(
jobLogInit
)
import
Gargantext.API.Count
(
CountAPI
,
count
,
Query
)
import
Gargantext.API.Ngrams
(
TableNgramsApi
,
apiNgramsTableDoc
)
import
Gargantext.API.Node
...
...
@@ -41,18 +55,7 @@ import Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
gc_max_docs_scrapers
)
import
Servant
import
Servant.Auth
as
SA
import
Servant.Auth.Swagger
()
import
Servant.Job.Async
import
Servant.Swagger.UI
import
qualified
Gargantext.API.Ngrams.List
as
List
import
qualified
Gargantext.API.Node.Contact
as
Contact
import
qualified
Gargantext.API.Node.Corpus.Annuaire
as
Annuaire
import
qualified
Gargantext.API.Node.Corpus.Export
as
Export
import
qualified
Gargantext.API.Node.Corpus.Export.Types
as
Export
import
qualified
Gargantext.API.Node.Corpus.New
as
New
import
qualified
Gargantext.API.Public
as
Public
type
GargAPI
=
"api"
:>
Summary
"API "
:>
GargAPIVersion
-- | TODO :<|> Summary "Latest API" :> GargAPI'
...
...
@@ -284,7 +287,7 @@ addCorpusWithForm user cid =
log''
x
=
do
printDebug
"addToCorpusWithForm"
x
liftBase
$
log'
x
in
New
.
addToCorpusWithForm
user
cid
i
log''
)
in
New
.
addToCorpusWithForm
user
cid
i
log''
(
jobLogInit
3
)
)
addCorpusWithFile
::
User
->
GargServer
New
.
AddWithFile
addCorpusWithFile
user
cid
=
...
...
src/Gargantext/Database/Prelude.hs
View file @
8667dfeb
...
...
@@ -31,7 +31,6 @@ import Database.PostgreSQL.Simple (Connection, connect)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
),
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.Internal
(
Field
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
())
import
Opaleye
(
Query
,
Unpackspec
,
showSqlForPostgres
,
FromFields
,
Select
,
runQuery
,
PGJsonb
,
QueryRunnerColumnDefault
)
import
Opaleye.Aggregate
(
countRows
)
import
System.IO
(
FilePath
)
...
...
@@ -41,6 +40,8 @@ import qualified Data.ByteString as DB
import
qualified
Data.List
as
DL
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
Gargantext.Prelude.Config
(
GargConfig
())
-------------------------------------------------------
class
HasConnectionPool
env
where
connPool
::
Getter
env
(
Pool
Connection
)
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
8667dfeb
...
...
@@ -121,7 +121,7 @@ getClosestParentIdByType :: HasDBid NodeType
getClosestParentIdByType
nId
nType
=
do
result
<-
runPGSQuery
query
(
nId
,
0
::
Int
)
case
result
of
[
DPS
.
Only
parentId
,
DPS
.
Only
pTypename
]
->
do
[
(
NodeId
parentId
,
pTypename
)
]
->
do
if
toDBid
nType
==
pTypename
then
pure
$
Just
$
NodeId
parentId
else
...
...
@@ -131,7 +131,7 @@ getClosestParentIdByType nId nType = do
query
::
DPS
.
Query
query
=
[
sql
|
SELECT n2.id, n2.typename
FROM nodes n1
FROM nodes n1
JOIN nodes n2 ON n1.parent_id = n2.id
WHERE n1.id = ? AND 0 = ?;
|]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment