Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
59fc9cfd
Commit
59fc9cfd
authored
Jan 25, 2023
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Improving NodeWriteParsing
parent
d41791b9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
36 deletions
+31
-36
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+10
-29
FrameWrite.hs
src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
+21
-7
No files found.
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
59fc9cfd
...
...
@@ -38,7 +38,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)
import
Gargantext.Database.Admin.Types.Hyperdata.Frame
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getChildrenByType
,
getClosestParentIdByType'
,
getNodeWith
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
,
node_name
)
import
Gargantext.Prelude
import
Gargantext.Utils.Jobs
(
serveJobsAPI
)
import
Servant
...
...
@@ -99,7 +99,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
pure
(
node
,
contents
)
)
frameWrites
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
7
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsedE
=
map
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
7
(
node
,
contents
))
frameWritesWithContents
-- TODO hard coded param should be taken from user
let
parsed
=
List
.
concat
$
rights
parsedE
...
...
@@ -108,34 +108,15 @@ documentsFromWriteNodes uId nId _p logStatus = do
(
Multi
EN
)
cId
Nothing
logStatus
pure
$
jobLogSuccess
jobLog
------------------------------------------------------------------------
{-
-- extractFrameWrites :: (HasSettings env, FlowCmdM env err m) => NodeId -> m [Node T.Text]
extractFrameWrites nId = do
mcId <- getClosestParentIdByType' nId NodeCorpus
frameWriteIds <- getChildrenByType (fromMaybe (panic "[G.A.N.DocumentsFromWriteNodes] No parent found") mcId) NodeFrameWrite
-- https://write.frame.gargantext.org/<frame_id>/download
frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
frameWritesWithContents <- liftBase $
mapM (\node -> do
contents <- getHyperdataFrameContents (node ^. node_hyperdata)
pure (node, contents)
) frameWrites
let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite 7 (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE
pure parsed
-}
------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite
::
Int
->
(
HyperdataFrame
,
T
.
Text
)
->
Either
T
.
Text
[
HyperdataDocument
]
hyperdataDocumentFromFrameWrite
paragraphSize
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
}
,
contents
)
=
hyperdataDocumentFromFrameWrite
::
Int
->
(
Node
HyperdataFrame
,
T
.
Text
)
->
Either
T
.
Text
[
HyperdataDocument
]
hyperdataDocumentFromFrameWrite
paragraphSize
(
node
,
contents
)
=
case
parseLines
contents
of
Left
_
->
Left
"Error parsing node"
Right
(
Parsed
{
authors
,
contents
=
ctxts
,
date
,
source
,
title
=
t
})
->
let
authorJoinSingle
(
Author
{
firstName
,
lastName
})
=
T
.
concat
[
lastName
,
", "
,
firstName
]
Right
(
Parsed
{
authors
,
contents
=
ctxts
,
date
})
->
let
HyperdataFrame
{
_hf_base
,
_hf_frame_id
}
=
node
^.
node_hyperdata
authorJoinSingle
(
Author
{
firstName
,
lastName
})
=
T
.
concat
[
lastName
,
", "
,
firstName
]
authors'
=
T
.
concat
$
authorJoinSingle
<$>
authors
date'
=
(
\
(
Date
{
year
,
month
,
day
})
->
T
.
concat
[
T
.
pack
$
show
year
,
"-"
,
T
.
pack
$
show
month
,
"-"
...
...
@@ -143,7 +124,7 @@ hyperdataDocumentFromFrameWrite paragraphSize (HyperdataFrame { _hf_base, _hf_fr
year'
=
fromIntegral
$
maybe
Defaults
.
year
(
\
(
Date
{
year
})
->
year
)
date
month'
=
maybe
Defaults
.
month
(
\
(
Date
{
month
})
->
fromIntegral
month
)
date
day'
=
maybe
Defaults
.
day
(
\
(
Date
{
day
})
->
fromIntegral
day
)
date
in
Right
(
List
.
map
(
\
ctxt
->
HyperdataDocument
{
_hd_bdd
=
Just
"FrameWrite"
Right
(
List
.
map
(
\
(
t
,
ctxt
)
->
HyperdataDocument
{
_hd_bdd
=
Just
"FrameWrite"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
...
...
@@ -152,7 +133,7 @@ hyperdataDocumentFromFrameWrite paragraphSize (HyperdataFrame { _hf_base, _hf_fr
,
_hd_title
=
Just
t
,
_hd_authors
=
Just
authors'
,
_hd_institutes
=
Nothing
,
_hd_source
=
sourc
e
,
_hd_source
=
Just
$
node
^.
node_nam
e
,
_hd_abstract
=
Just
ctxt
,
_hd_publication_date
=
date'
,
_hd_publication_year
=
Just
year'
...
...
@@ -162,5 +143,5 @@ hyperdataDocumentFromFrameWrite paragraphSize (HyperdataFrame { _hf_base, _hf_fr
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
EN
}
)
(
text2
p
aragraphs
paragraphSize
ctxts
)
)
(
text2
titleP
aragraphs
paragraphSize
ctxts
)
)
src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
View file @
59fc9cfd
...
...
@@ -108,25 +108,35 @@ data Line =
parseLines
::
Text
->
Either
ParseError
Parsed
parseLines
text
=
foldl
f
emptyParsed
<$>
lst
where
lst
=
parse
documentLines
P
""
(
unpack
text
)
lst
=
parse
documentLines
""
(
unpack
text
)
f
(
Parsed
{
..
})
(
LAuthors
as
)
=
Parsed
{
authors
=
as
,
..
}
f
(
Parsed
{
..
})
(
LContents
c
)
=
Parsed
{
contents
=
concat
[
contents
,
c
],
..
}
f
(
Parsed
{
..
})
(
LDate
d
)
=
Parsed
{
date
=
Just
d
,
..
}
f
(
Parsed
{
..
})
(
LSource
s
)
=
Parsed
{
source
=
Just
s
,
..
}
f
(
Parsed
{
..
})
(
LTitle
t
)
=
Parsed
{
title
=
t
,
..
}
-- Source should be the name of the node
-- First line of each Context should be the title.
documentLinesP
::
Parser
[
Line
]
documentLinesP
=
do
t
<-
titleP
ls
<-
lineP
`
sepBy
`
newline
pure
$
[
LTitle
$
pack
t
]
++
ls
documentLines
::
Parser
[
Line
]
documentLines
=
do
ls
<-
lineP
`
sepBy
`
newline
pure
ls
lineP
::
Parser
Line
lineP
=
do
choice
[
try
authorsLineP
,
try
dateLineP
,
try
sourceLineP
,
contentsLineP
]
,
contentsLineP
]
authorsLineP
::
Parser
Line
authorsLineP
=
do
...
...
@@ -167,6 +177,7 @@ titleDelimiterP = do
_
<-
newline
-- _ <- try (string "==")
pure
()
titleP
::
Parser
[
Char
]
titleP
=
manyTill
anyChar
(
try
titleDelimiterP
)
...
...
@@ -223,11 +234,14 @@ tokenEnd :: Parser ()
tokenEnd
=
void
(
char
'
\n
'
)
<|>
eof
--- MISC Tools
text2paragraphs
::
Int
->
Text
->
[
Text
]
text2paragraphs
n
=
List
.
map
DT
.
concat
.
splitEvery
n
.
List
.
map
clean
.
sentences
.
DT
.
concat
.
DT
.
lines
text2titleParagraphs
::
Int
->
Text
->
[(
Text
,
Text
)]
text2titleParagraphs
n
=
catMaybes
.
List
.
map
doTitle
.
splitEvery
n
.
List
.
map
clean
.
sentences
.
DT
.
concat
.
DT
.
lines
doTitle
::
[
Text
]
->
Maybe
(
Text
,
Text
)
doTitle
(
t
:
ts
)
=
Just
(
t
,
DT
.
concat
ts
)
doTitle
[]
=
Nothing
clean
::
Text
->
Text
clean
=
DT
.
unwords
.
List
.
filter
(
\
w
->
DT
.
length
w
<
25
)
.
DT
.
words
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment