Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
149
Issues
149
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d5a4b1f9
Commit
d5a4b1f9
authored
Sep 30, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[frame write] implement flowDataText to insert docs into corpus
parent
852fae28
Pipeline
#1899
passed with stage
in 38 minutes and 22 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
89 additions
and
2 deletions
+89
-2
pkgs.nix
nix/pkgs.nix
+1
-0
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+57
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+2
-1
Frame.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
+11
-1
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+18
-0
No files found.
nix/pkgs.nix
View file @
d5a4b1f9
...
...
@@ -9,6 +9,7 @@ rec {
];
nonhsBuildInputs
=
with
pkgs
;
[
bzip2
docker-compose
git
gmp
gsl
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
d5a4b1f9
...
...
@@ -17,13 +17,25 @@ Portability : POSIX
module
Gargantext.API.Node.DocumentsFromWriteNodes
where
import
Control.Lens
((
^.
))
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
rights
)
import
Data.Swagger
import
qualified
Data.Text
as
T
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Flow
(
flowDataText
,
DataText
(
..
))
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getChildrenByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
import
GHC.Generics
(
Generic
)
import
Servant
...
...
@@ -71,9 +83,54 @@ documentsFromWriteNodes uId nId p logStatus = do
_
<-
printDebug
"[documentsFromWriteNodes] inside job, nId"
nId
_
<-
printDebug
"[documentsFromWriteNodes] inside job, p"
p
frameWriteIds
<-
getChildrenByType
nId
NodeFrameWrite
_
<-
printDebug
"[documentsFromWriteNodes] children"
frameWriteIds
-- https://write.frame.gargantext.org/<frame_id>/download
frameWrites
<-
mapM
(
\
id
->
getNodeWith
id
(
Proxy
::
Proxy
HyperdataFrame
))
frameWriteIds
frameWritesWithContents
<-
liftBase
$
mapM
(
\
node
->
do
contents
<-
getHyperdataFrameContents
(
node
^.
node_hyperdata
)
pure
(
node
,
contents
)
)
frameWrites
_
<-
printDebug
"[documentsFromWriteNodes] frameWritesWithContents"
frameWritesWithContents
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsed
=
rights
parsedE
_
<-
printDebug
"[documentsFromWriteNodes] parsed"
parsed
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[
parsed
])
(
Multi
EN
)
nId
Nothing
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite
::
(
HyperdataFrame
,
T
.
Text
)
->
Either
T
.
Text
HyperdataDocument
hyperdataDocumentFromFrameWrite
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
},
contents
)
=
case
parseLines
contents
of
Left
_
->
Left
"Error parsing node"
Right
(
Parsed
{
authors
,
contents
=
c
,
date
,
source
,
title
=
t
})
->
let
authorJoinSingle
(
Author
{
firstName
,
lastName
})
=
T
.
concat
[
lastName
,
", "
,
firstName
]
in
let
authors'
=
T
.
concat
$
authorJoinSingle
<$>
authors
in
Right
HyperdataDocument
{
_hd_bdd
=
Just
"FrameWrite"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
t
,
_hd_authors
=
Just
authors'
,
_hd_institutes
=
Nothing
,
_hd_source
=
source
,
_hd_abstract
=
Just
c
,
_hd_publication_date
=
date
,
_hd_publication_year
=
Nothing
-- TODO
,
_hd_publication_month
=
Nothing
-- TODO
,
_hd_publication_day
=
Nothing
-- TODO
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
EN
}
src/Gargantext/Database/Action/Flow.hs
View file @
d5a4b1f9
...
...
@@ -24,7 +24,8 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Action.Flow
-- (flowDatabase, ngrams2list)
(
getDataText
(
DataText
(
..
)
,
getDataText
,
flowDataText
,
flow
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
View file @
d5a4b1f9
...
...
@@ -21,15 +21,20 @@ Portability : POSIX
module
Gargantext.Database.Admin.Types.Hyperdata.Frame
where
import
Control.Lens
import
Data.ByteString.Lazy
(
toStrict
)
import
qualified
Data.Text
as
T
import
Data.Text.Encoding
(
decodeUtf8
)
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
qualified
Network.Wreq
as
Wreq
------------------------------------------------------------------------
data
HyperdataFrame
=
HyperdataFrame
{
_hf_base
::
!
Text
,
_hf_frame_id
::
!
Text
}
deriving
(
Generic
)
deriving
(
Generic
,
Show
)
defaultHyperdataFrame
::
HyperdataFrame
...
...
@@ -63,3 +68,8 @@ instance ToSchema HyperdataFrame where
&
mapped
.
schema
.
description
?~
"Frame Hyperdata"
&
mapped
.
schema
.
example
?~
toJSON
defaultHyperdataFrame
getHyperdataFrameContents
::
HyperdataFrame
->
IO
Text
getHyperdataFrameContents
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
let
path
=
T
.
concat
[
_hf_base
,
"/"
,
_hf_frame_id
,
"/download"
]
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
src/Gargantext/Database/Query/Table/Node.hs
View file @
d5a4b1f9
...
...
@@ -136,6 +136,24 @@ getClosestParentIdByType nId nType = do
WHERE n1.id = ? AND 0 = ?;
|]
-- | Given a node id, find all it's children (no matter how deep) of
-- given node type.
getChildrenByType
::
HasDBid
NodeType
=>
NodeId
->
NodeType
->
Cmd
err
[
NodeId
]
getChildrenByType
nId
nType
=
do
result
<-
runPGSQuery
query
(
nId
,
0
::
Int
)
children_lst
<-
mapM
(
\
(
id
,
_
)
->
getChildrenByType
id
nType
)
result
pure
$
concat
$
[
fst
<$>
filter
(
\
(
_
,
pTypename
)
->
pTypename
==
toDBid
nType
)
result
]
++
children_lst
where
query
::
DPS
.
Query
query
=
[
sql
|
SELECT n.id, n.typename
FROM nodes n
WHERE n.parent_id = ? AND 0 = ?;
|]
------------------------------------------------------------------------
getDocumentsV3WithParentId
::
HasDBid
NodeType
=>
NodeId
->
Cmd
err
[
Node
HyperdataDocumentV3
]
getDocumentsV3WithParentId
n
=
runOpaQuery
$
selectNodesWith'
n
(
Just
NodeDocument
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment