Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d5a4b1f9
Commit
d5a4b1f9
authored
Sep 30, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[frame write] implement flowDataText to insert docs into corpus
parent
852fae28
Pipeline
#1899
passed with stage
in 38 minutes and 22 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
89 additions
and
2 deletions
+89
-2
pkgs.nix
nix/pkgs.nix
+1
-0
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+57
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+2
-1
Frame.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
+11
-1
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+18
-0
No files found.
nix/pkgs.nix
View file @
d5a4b1f9
...
...
@@ -9,6 +9,7 @@ rec {
];
nonhsBuildInputs
=
with
pkgs
;
[
bzip2
docker-compose
git
gmp
gsl
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
d5a4b1f9
...
...
@@ -17,13 +17,25 @@ Portability : POSIX
module
Gargantext.API.Node.DocumentsFromWriteNodes
where
import
Control.Lens
((
^.
))
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
rights
)
import
Data.Swagger
import
qualified
Data.Text
as
T
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Flow
(
flowDataText
,
DataText
(
..
))
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getChildrenByType
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
import
GHC.Generics
(
Generic
)
import
Servant
...
...
@@ -71,9 +83,54 @@ documentsFromWriteNodes uId nId p logStatus = do
_
<-
printDebug
"[documentsFromWriteNodes] inside job, nId"
nId
_
<-
printDebug
"[documentsFromWriteNodes] inside job, p"
p
frameWriteIds
<-
getChildrenByType
nId
NodeFrameWrite
_
<-
printDebug
"[documentsFromWriteNodes] children"
frameWriteIds
-- https://write.frame.gargantext.org/<frame_id>/download
frameWrites
<-
mapM
(
\
id
->
getNodeWith
id
(
Proxy
::
Proxy
HyperdataFrame
))
frameWriteIds
frameWritesWithContents
<-
liftBase
$
mapM
(
\
node
->
do
contents
<-
getHyperdataFrameContents
(
node
^.
node_hyperdata
)
pure
(
node
,
contents
)
)
frameWrites
_
<-
printDebug
"[documentsFromWriteNodes] frameWritesWithContents"
frameWritesWithContents
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsed
=
rights
parsedE
_
<-
printDebug
"[documentsFromWriteNodes] parsed"
parsed
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[
parsed
])
(
Multi
EN
)
nId
Nothing
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite
::
(
HyperdataFrame
,
T
.
Text
)
->
Either
T
.
Text
HyperdataDocument
hyperdataDocumentFromFrameWrite
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
},
contents
)
=
case
parseLines
contents
of
Left
_
->
Left
"Error parsing node"
Right
(
Parsed
{
authors
,
contents
=
c
,
date
,
source
,
title
=
t
})
->
let
authorJoinSingle
(
Author
{
firstName
,
lastName
})
=
T
.
concat
[
lastName
,
", "
,
firstName
]
in
let
authors'
=
T
.
concat
$
authorJoinSingle
<$>
authors
in
Right
HyperdataDocument
{
_hd_bdd
=
Just
"FrameWrite"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
t
,
_hd_authors
=
Just
authors'
,
_hd_institutes
=
Nothing
,
_hd_source
=
source
,
_hd_abstract
=
Just
c
,
_hd_publication_date
=
date
,
_hd_publication_year
=
Nothing
-- TODO
,
_hd_publication_month
=
Nothing
-- TODO
,
_hd_publication_day
=
Nothing
-- TODO
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
EN
}
src/Gargantext/Database/Action/Flow.hs
View file @
d5a4b1f9
...
...
@@ -24,7 +24,8 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Action.Flow
-- (flowDatabase, ngrams2list)
(
getDataText
(
DataText
(
..
)
,
getDataText
,
flowDataText
,
flow
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
View file @
d5a4b1f9
...
...
@@ -21,15 +21,20 @@ Portability : POSIX
module
Gargantext.Database.Admin.Types.Hyperdata.Frame
where
import
Control.Lens
import
Data.ByteString.Lazy
(
toStrict
)
import
qualified
Data.Text
as
T
import
Data.Text.Encoding
(
decodeUtf8
)
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
qualified
Network.Wreq
as
Wreq
------------------------------------------------------------------------
data
HyperdataFrame
=
HyperdataFrame
{
_hf_base
::
!
Text
,
_hf_frame_id
::
!
Text
}
deriving
(
Generic
)
deriving
(
Generic
,
Show
)
defaultHyperdataFrame
::
HyperdataFrame
...
...
@@ -63,3 +68,8 @@ instance ToSchema HyperdataFrame where
&
mapped
.
schema
.
description
?~
"Frame Hyperdata"
&
mapped
.
schema
.
example
?~
toJSON
defaultHyperdataFrame
getHyperdataFrameContents
::
HyperdataFrame
->
IO
Text
getHyperdataFrameContents
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
let
path
=
T
.
concat
[
_hf_base
,
"/"
,
_hf_frame_id
,
"/download"
]
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
src/Gargantext/Database/Query/Table/Node.hs
View file @
d5a4b1f9
...
...
@@ -136,6 +136,24 @@ getClosestParentIdByType nId nType = do
WHERE n1.id = ? AND 0 = ?;
|]
-- | Given a node id, find all it's children (no matter how deep) of
-- given node type.
getChildrenByType
::
HasDBid
NodeType
=>
NodeId
->
NodeType
->
Cmd
err
[
NodeId
]
getChildrenByType
nId
nType
=
do
result
<-
runPGSQuery
query
(
nId
,
0
::
Int
)
children_lst
<-
mapM
(
\
(
id
,
_
)
->
getChildrenByType
id
nType
)
result
pure
$
concat
$
[
fst
<$>
filter
(
\
(
_
,
pTypename
)
->
pTypename
==
toDBid
nType
)
result
]
++
children_lst
where
query
::
DPS
.
Query
query
=
[
sql
|
SELECT n.id, n.typename
FROM nodes n
WHERE n.parent_id = ? AND 0 = ?;
|]
------------------------------------------------------------------------
getDocumentsV3WithParentId
::
HasDBid
NodeType
=>
NodeId
->
Cmd
err
[
Node
HyperdataDocumentV3
]
getDocumentsV3WithParentId
n
=
runOpaQuery
$
selectNodesWith'
n
(
Just
NodeDocument
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment