Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
b47bbd5f
Verified
Commit
b47bbd5f
authored
Oct 21, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[tsv] separate tsvIsidore format, some refactoring
parent
2ceb742b
Pipeline
#7988
passed with stages
in 49 minutes and 24 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
143 additions
and
85 deletions
+143
-85
gargantext.cabal
gargantext.cabal
+1
-0
Export.hs
src/Gargantext/API/Node/Document/Export.hs
+41
-39
Types.hs
src/Gargantext/API/Node/Document/Export/Types.hs
+4
-22
Remote.hs
src/Gargantext/API/Server/Named/Remote.hs
+2
-2
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+2
-2
TSVv3.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV/TSVv3.hs
+16
-18
TsvIsidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV/TsvIsidore.hs
+66
-0
Utils.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV/Utils.hs
+11
-2
No files found.
gargantext.cabal
View file @
b47bbd5f
...
@@ -234,6 +234,7 @@ library
...
@@ -234,6 +234,7 @@ library
Gargantext.Core.Text.Corpus.Parsers.TSV.TSVv3
Gargantext.Core.Text.Corpus.Parsers.TSV.TSVv3
Gargantext.Core.Text.Corpus.Parsers.TSV.TSVv4
Gargantext.Core.Text.Corpus.Parsers.TSV.TSVv4
Gargantext.Core.Text.Corpus.Parsers.TSV.TsvHal
Gargantext.Core.Text.Corpus.Parsers.TSV.TsvHal
Gargantext.Core.Text.Corpus.Parsers.TSV.TsvIsidore
Gargantext.Core.Text.Corpus.Parsers.TSV.TsvPhylo
Gargantext.Core.Text.Corpus.Parsers.TSV.TsvPhylo
Gargantext.Core.Text.Corpus.Parsers.TSV.Types
Gargantext.Core.Text.Corpus.Parsers.TSV.Types
Gargantext.Core.Text.Corpus.Parsers.TSV.Utils
Gargantext.Core.Text.Corpus.Parsers.TSV.Utils
...
...
src/Gargantext/API/Node/Document/Export.hs
View file @
b47bbd5f
...
@@ -11,15 +11,12 @@ Portability : POSIX
...
@@ -11,15 +11,12 @@ Portability : POSIX
module
Gargantext.API.Node.Document.Export
module
Gargantext.API.Node.Document.Export
(
documentExportAPI
(
documentExportAPI
-- * Internals
-- * Internals
,
get
_document_json
,
get
DocumentExport
)
)
where
where
import
Control.Lens
(
view
)
import
Control.Lens
(
view
)
import
Data.ByteString.Lazy.Char8
qualified
as
BSC
import
Data.Csv
(
encodeDefaultOrderedByNameWith
,
defaultEncodeOptions
,
encDelimiter
,
encQuoting
,
Quoting
(
..
))
import
Data.Text
qualified
as
T
import
Data.Text
qualified
as
T
import
Data.Text.Encoding
qualified
as
TE
import
Data.Time.Clock.System
(
getSystemTime
,
systemSeconds
)
import
Data.Time.Clock.System
(
getSystemTime
,
systemSeconds
)
import
Data.Time.LocalTime
(
getCurrentTimeZone
,
TimeZone
(
timeZoneMinutes
))
import
Data.Time.LocalTime
(
getCurrentTimeZone
,
TimeZone
(
timeZoneMinutes
))
import
Data.Version
(
showVersion
)
import
Data.Version
(
showVersion
)
...
@@ -27,6 +24,8 @@ import Gargantext.API.Node.Document.Export.Types
...
@@ -27,6 +24,8 @@ import Gargantext.API.Node.Document.Export.Types
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.API.Routes.Named.Document
qualified
as
Named
import
Gargantext.API.Routes.Named.Document
qualified
as
Named
import
Gargantext.Core
(
toDBid
)
import
Gargantext.Core
(
toDBid
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV.TSVv3
qualified
as
TSVv3
import
Gargantext.Core.Text.Corpus.Parsers.TSV.Utils
(
writeDocs2TsvOrdered
)
import
Gargantext.Database.Admin.Types.Node
(
DocId
,
NodeId
,
NodeType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
DocId
,
NodeId
,
NodeType
(
..
))
import
Gargantext.Database.Prelude
import
Gargantext.Database.Prelude
import
Gargantext.Database.Query.Facet
(
runViewDocuments
,
Facet
(
..
))
import
Gargantext.Database.Query.Facet
(
runViewDocuments
,
Facet
(
..
))
...
@@ -35,7 +34,7 @@ import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
...
@@ -35,7 +34,7 @@ import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
),
node_user_id
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
),
node_user_id
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Paths_gargantext
qualified
as
PG
-- cabal magic build module
import
Paths_gargantext
qualified
as
PG
-- cabal magic build module
import
Servant
(
addHeader
,
Header
,
Headers
(
getResponse
)
)
import
Servant
(
addHeader
,
Header
,
Headers
)
import
Servant.Server.Generic
(
AsServerT
)
import
Servant.Server.Generic
(
AsServerT
)
documentExportAPI
::
IsGargServer
env
err
m
documentExportAPI
::
IsGargServer
env
err
m
...
@@ -57,50 +56,21 @@ getDocumentsJSON :: IsGargServer env err m
...
@@ -57,50 +56,21 @@ getDocumentsJSON :: IsGargServer env err m
->
DocId
->
DocId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
DocumentExport
)
->
m
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
DocumentExport
)
getDocumentsJSON
nodeUserId
pId
=
do
getDocumentsJSON
nodeUserId
pId
=
do
dexp
<-
get
_document_json
nodeUserId
pId
dexp
<-
get
DocumentExport
nodeUserId
pId
pure
$
addHeader
(
T
.
concat
[
"attachment; filename="
pure
$
addHeader
(
T
.
concat
[
"attachment; filename="
,
"GarganText_DocsList-"
,
"GarganText_DocsList-"
,
T
.
pack
(
show
pId
)
,
T
.
pack
(
show
pId
)
,
".json"
])
dexp
,
".json"
])
dexp
get_document_json
::
IsGargServer
err
env
m
=>
NodeId
->
DocId
->
m
DocumentExport
get_document_json
nodeUserId
pId
=
do
runDBQuery
$
do
uId
<-
view
node_user_id
<$>
getNodeUser
nodeUserId
mcId
<-
getClosestParentIdByType
pId
NodeCorpus
let
cId
=
maybe
(
panicTrace
"[G.A.N.D.Export] Node has no parent"
)
identity
mcId
docs
<-
runViewDocuments
cId
False
Nothing
Nothing
Nothing
Nothing
Nothing
pure
DocumentExport
{
_de_documents
=
mapFacetDoc
uId
<$>
docs
,
_de_garg_version
=
T
.
pack
$
showVersion
PG
.
version
}
where
mapFacetDoc
uId
(
FacetDoc
{
..
})
=
Document
{
_d_document
=
Node
{
_node_id
=
facetDoc_id
,
_node_hash_id
=
Nothing
,
_node_typename
=
toDBid
NodeDocument
,
_node_user_id
=
uId
,
_node_parent_id
=
Nothing
,
_node_name
=
facetDoc_title
,
_node_date
=
facetDoc_created
,
_node_hyperdata
=
facetDoc_hyperdata
}
,
_d_ngrams
=
Ngrams
{
_ng_ngrams
=
[]
,
_ng_hash
=
""
}
,
_d_hash
=
""
}
_mapDoc
d
=
Document
{
_d_document
=
d
,
_d_ngrams
=
Ngrams
{
_ng_ngrams
=
[]
,
_ng_hash
=
""
}
,
_d_hash
=
""
}
getDocumentsJSONZip
::
IsGargServer
env
err
m
getDocumentsJSONZip
::
IsGargServer
env
err
m
=>
NodeId
=>
NodeId
-- ^ The Node ID of the target user
-- ^ The Node ID of the target user
->
DocId
->
DocId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
DocumentExportZIP
)
-- [Document]
->
m
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
DocumentExportZIP
)
-- [Document]
getDocumentsJSONZip
userNodeId
pId
=
do
getDocumentsJSONZip
userNodeId
pId
=
do
d
JSON
<-
getDocumentsJSON
userNodeId
pId
d
exp
<-
getDocumentExport
userNodeId
pId
systime
<-
liftBase
getSystemTime
systime
<-
liftBase
getSystemTime
tz
<-
liftBase
getCurrentTimeZone
tz
<-
liftBase
getCurrentTimeZone
let
dexp
=
getResponse
dJSON
let
dexpz
=
DocumentExportZIP
{
_dez_dexp
=
dexp
let
dexpz
=
DocumentExportZIP
{
_dez_dexp
=
dexp
,
_dez_doc_id
=
pId
,
_dez_doc_id
=
pId
-- see https://github.com/jgm/zip-archive/commit/efe4423a9a2b1dc2a4d413917a933828d3f8dc0f
-- see https://github.com/jgm/zip-archive/commit/efe4423a9a2b1dc2a4d413917a933828d3f8dc0f
...
@@ -116,11 +86,43 @@ getDocumentsTSV :: IsGargServer err env m
...
@@ -116,11 +86,43 @@ getDocumentsTSV :: IsGargServer err env m
->
DocId
->
DocId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
T
.
Text
)
-- [Document]
->
m
(
Headers
'[
H
eader
"Content-Disposition"
T
.
Text
]
T
.
Text
)
-- [Document]
getDocumentsTSV
userNodeId
pId
=
do
getDocumentsTSV
userNodeId
pId
=
do
dJSON
<-
getDocumentsJSON
userNodeId
pId
dexp
<-
getDocumentExport
userNodeId
pId
let
DocumentExport
{
_de_documents
}
=
getResponse
dJSON
let
ret
=
writeDocs2TsvOrdered
TSVv3
.
doc2tsv
(
deHyperdata
dexp
)
let
ret
=
TE
.
decodeUtf8
$
BSC
.
toStrict
$
encodeDefaultOrderedByNameWith
(
defaultEncodeOptions
{
encDelimiter
=
fromIntegral
$
ord
'
\t
'
,
encQuoting
=
QuoteAll
})
_de_documents
pure
$
addHeader
(
T
.
concat
[
"attachment; filename=GarganText_DocsList-"
pure
$
addHeader
(
T
.
concat
[
"attachment; filename=GarganText_DocsList-"
,
T
.
pack
$
show
pId
,
T
.
pack
$
show
pId
,
".tsv"
])
,
".tsv"
])
ret
ret
getDocumentExport
::
IsGargServer
err
env
m
=>
NodeId
->
DocId
->
m
DocumentExport
getDocumentExport
nodeUserId
pId
=
do
runDBQuery
$
do
uId
<-
view
node_user_id
<$>
getNodeUser
nodeUserId
mcId
<-
getClosestParentIdByType
pId
NodeCorpus
let
cId
=
maybe
(
panicTrace
"[G.A.N.D.Export] Node has no parent"
)
identity
mcId
docs
<-
runViewDocuments
cId
False
Nothing
Nothing
Nothing
Nothing
Nothing
pure
DocumentExport
{
_de_documents
=
mapFacetDoc
uId
<$>
docs
,
_de_garg_version
=
T
.
pack
$
showVersion
PG
.
version
}
where
mapFacetDoc
uId
(
FacetDoc
{
..
})
=
Document
{
_d_document
=
Node
{
_node_id
=
facetDoc_id
,
_node_hash_id
=
Nothing
,
_node_typename
=
toDBid
NodeDocument
,
_node_user_id
=
uId
,
_node_parent_id
=
Nothing
,
_node_name
=
facetDoc_title
,
_node_date
=
facetDoc_created
,
_node_hyperdata
=
facetDoc_hyperdata
}
,
_d_ngrams
=
Ngrams
{
_ng_ngrams
=
[]
,
_ng_hash
=
""
}
,
_d_hash
=
""
}
_mapDoc
d
=
Document
{
_d_document
=
d
,
_d_ngrams
=
Ngrams
{
_ng_ngrams
=
[]
,
_ng_hash
=
""
}
,
_d_hash
=
""
}
src/Gargantext/API/Node/Document/Export/Types.hs
View file @
b47bbd5f
...
@@ -16,9 +16,7 @@ module Gargantext.API.Node.Document.Export.Types where
...
@@ -16,9 +16,7 @@ module Gargantext.API.Node.Document.Export.Types where
import
Codec.Serialise.Class
hiding
(
encode
)
import
Codec.Serialise.Class
hiding
(
encode
)
import
Data.Aeson
(
encode
)
import
Data.Aeson
(
encode
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Csv
(
DefaultOrdered
(
..
),
ToNamedRecord
(
..
),
(
.=
),
header
,
namedRecord
)
import
Data.Swagger
(
genericDeclareNamedSchema
,
ToParamSchema
(
..
),
ToSchema
(
..
)
)
import
Data.Swagger
(
genericDeclareNamedSchema
,
ToParamSchema
(
..
),
ToSchema
(
..
)
)
import
Data.Text.Encoding
qualified
as
TE
import
Data.Text
qualified
as
T
import
Data.Text
qualified
as
T
import
Gargantext.Core.Types
(
Node
,
TODO
)
import
Gargantext.Core.Types
(
Node
,
TODO
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
...
@@ -40,6 +38,10 @@ data DocumentExport =
...
@@ -40,6 +38,10 @@ data DocumentExport =
instance
Serialise
DocumentExport
where
instance
Serialise
DocumentExport
where
deHyperdata
::
DocumentExport
->
[
HyperdataDocument
]
deHyperdata
(
DocumentExport
{
_de_documents
})
=
(
\
(
Document
{
_d_document
})
->
_node_hyperdata
_d_document
)
<$>
_de_documents
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data
DocumentExportZIP
=
data
DocumentExportZIP
=
DocumentExportZIP
{
_dez_dexp
::
DocumentExport
DocumentExportZIP
{
_dez_dexp
::
DocumentExport
...
@@ -60,26 +62,6 @@ instance Show Document where
...
@@ -60,26 +62,6 @@ instance Show Document where
show
(
Document
_
_
h1
)
=
"Document "
<>
Prelude
.
show
h1
show
(
Document
_
_
h1
)
=
"Document "
<>
Prelude
.
show
h1
instance
Serialise
Document
where
instance
Serialise
Document
where
--instance Read Document where
-- read "" = panic "not implemented"
instance
DefaultOrdered
Document
where
headerOrder
_
=
header
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
instance
ToNamedRecord
Document
where
toNamedRecord
(
Document
{
_d_document
=
Node
{
..
}})
=
namedRecord
[
"Publication Day"
.=
_hd_publication_day
_node_hyperdata
,
"Publication Month"
.=
_hd_publication_month
_node_hyperdata
,
"Publication Year"
.=
_hd_publication_year
_node_hyperdata
,
"Authors"
.=
_hd_authors
_node_hyperdata
,
"Title"
.=
_hd_title
_node_hyperdata
,
"Source"
.=
(
TE
.
encodeUtf8
<$>
_hd_source
_node_hyperdata
)
,
"Abstract"
.=
(
TE
.
encodeUtf8
<$>
_hd_abstract
_node_hyperdata
)
]
data
Ngrams
=
data
Ngrams
=
Ngrams
{
_ng_ngrams
::
[
Text
]
Ngrams
{
_ng_ngrams
::
[
Text
]
...
...
src/Gargantext/API/Server/Named/Remote.hs
View file @
b47bbd5f
...
@@ -37,7 +37,7 @@ import Gargantext.API.Auth.PolicyCheck (remoteExportChecks)
...
@@ -37,7 +37,7 @@ import Gargantext.API.Auth.PolicyCheck (remoteExportChecks)
import
Gargantext.API.Errors.Types
import
Gargantext.API.Errors.Types
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.Types
(
NgramsList
)
import
Gargantext.API.Ngrams.Types
(
NgramsList
)
import
Gargantext.API.Node.Document.Export
(
get
_document_json
)
import
Gargantext.API.Node.Document.Export
(
get
DocumentExport
)
import
Gargantext.API.Node.Document.Export.Types
(
DocumentExport
(
_de_documents
)
)
import
Gargantext.API.Node.Document.Export.Types
(
DocumentExport
(
_de_documents
)
)
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.API.Routes.Client
(
remoteImportClient
)
import
Gargantext.API.Routes.Client
(
remoteImportClient
)
...
@@ -239,7 +239,7 @@ makeExportable userNodeId (TreeN x xs)
...
@@ -239,7 +239,7 @@ makeExportable userNodeId (TreeN x xs)
NodeCorpus
->
EN_corpus
<$>
pure
x
NodeCorpus
->
EN_corpus
<$>
pure
x
NodeGraph
->
EN_graph
<$>
pure
x
NodeGraph
->
EN_graph
<$>
pure
x
NodePhylo
->
EN_phylo
<$>
pure
x
NodePhylo
->
EN_phylo
<$>
pure
x
NodeTexts
->
EN_document
<$>
pure
x
<*>
get
_document_json
userNodeId
(
_node_id
x
)
NodeTexts
->
EN_document
<$>
pure
x
<*>
get
DocumentExport
userNodeId
(
_node_id
x
)
NodeList
->
EN_terms
<$>
pure
x
<*>
runDBQuery
(
getNgramsList
env
(
_node_id
x
))
NodeList
->
EN_terms
<$>
pure
x
<*>
runDBQuery
(
getNgramsList
env
(
_node_id
x
))
Notes
->
case
JS
.
parseMaybe
JS
.
parseJSON
(
_node_hyperdata
x
)
of
Notes
->
case
JS
.
parseMaybe
JS
.
parseJSON
(
_node_hyperdata
x
)
of
Nothing
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
b47bbd5f
...
@@ -22,7 +22,7 @@ import Data.Text qualified as Text
...
@@ -22,7 +22,7 @@ import Data.Text qualified as Text
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
cleanText
)
import
Gargantext.Core.Text.Corpus.Parsers
(
cleanText
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
(
writeDocs2Tsv
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
(
writeDocs2Tsv
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV.T
SVv3
qualified
as
TSVv3
import
Gargantext.Core.Text.Corpus.Parsers.TSV.T
svIsidore
qualified
as
TsvIsidore
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Defaults
qualified
as
Defaults
...
@@ -55,7 +55,7 @@ isidore2tsvFile :: FilePath -> Lang -> Maybe Isidore.Limit
...
@@ -55,7 +55,7 @@ isidore2tsvFile :: FilePath -> Lang -> Maybe Isidore.Limit
->
IO
()
->
IO
()
isidore2tsvFile
fp
lang
li
tq
aq
=
do
isidore2tsvFile
fp
lang
li
tq
aq
=
do
hdocs
<-
get
lang
li
tq
aq
hdocs
<-
get
lang
li
tq
aq
writeDocs2Tsv
T
SVv3
.
headerTsvGargV3
TSVv3
.
doc2tsv
fp
hdocs
writeDocs2Tsv
T
svIsidore
.
headerIsidore
TsvIsidore
.
doc2tsv
fp
hdocs
isidoreToDoc
::
Lang
->
IsidoreDoc
->
IO
HyperdataDocument
isidoreToDoc
::
Lang
->
IsidoreDoc
->
IO
HyperdataDocument
isidoreToDoc
lang
(
IsidoreDoc
t
a
d
u
s
as
)
=
do
isidoreToDoc
lang
(
IsidoreDoc
t
a
d
u
s
as
)
=
do
...
...
src/Gargantext/Core/Text/Corpus/Parsers/TSV/TSVv3.hs
View file @
b47bbd5f
...
@@ -20,17 +20,6 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..
...
@@ -20,17 +20,6 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..
import
Gargantext.Prelude
import
Gargantext.Prelude
headerTsvGargV3
::
Header
headerTsvGargV3
=
header
[
"title"
,
"source"
,
"publication_year"
,
"publication_month"
,
"publication_day"
,
"abstract"
,
"authors"
]
data
TsvGargV3
=
TsvGargV3
data
TsvGargV3
=
TsvGargV3
{
d_docId
::
!
Int
{
d_docId
::
!
Int
,
d_title
::
!
Text
,
d_title
::
!
Text
...
@@ -97,15 +86,24 @@ instance FromNamedRecord TsvDoc where
...
@@ -97,15 +86,24 @@ instance FromNamedRecord TsvDoc where
tsv_authors
<-
r
.:
"authors"
<|>
r
.:
"Authors"
tsv_authors
<-
r
.:
"authors"
<|>
r
.:
"Authors"
pure
$
TsvDoc
{
..
}
pure
$
TsvDoc
{
..
}
instance
DefaultOrdered
TsvDoc
where
headerOrder
_
=
header
[
"Publication Day"
,
"Publication Month"
,
"Publication Year"
,
"Authors"
,
"Title"
,
"Source"
,
"Abstract"
]
instance
ToNamedRecord
TsvDoc
where
instance
ToNamedRecord
TsvDoc
where
toNamedRecord
(
TsvDoc
{
..
})
=
toNamedRecord
(
TsvDoc
{
..
})
=
namedRecord
[
"
t
itle"
.=
tsv_title
namedRecord
[
"
T
itle"
.=
tsv_title
,
"
s
ource"
.=
tsv_source
,
"
S
ource"
.=
tsv_source
,
"
publication_y
ear"
.=
tsv_publication_year
,
"
Publication Y
ear"
.=
tsv_publication_year
,
"
publication_m
onth"
.=
tsv_publication_month
,
"
Publication M
onth"
.=
tsv_publication_month
,
"
publication_d
ay"
.=
tsv_publication_day
,
"
Publication D
ay"
.=
tsv_publication_day
,
"
a
bstract"
.=
tsv_abstract
,
"
A
bstract"
.=
tsv_abstract
,
"
a
uthors"
.=
tsv_authors
,
"
A
uthors"
.=
tsv_authors
]
]
...
...
src/Gargantext/Core/Text/Corpus/Parsers/TSV/TsvIsidore.hs
0 → 100644
View file @
b47bbd5f
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.TSV.TsvIsidore
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Gargantext.Core.Text.Corpus.Parsers.TSV.TsvIsidore
where
import
Data.Csv
import
Gargantext.Core.Text.Corpus.Parsers.TSV.IntOrDec
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Prelude
headerIsidore
::
Header
headerIsidore
=
header
[
"title"
,
"source"
,
"publication_year"
,
"publication_month"
,
"publication_day"
,
"abstract"
,
"authors"
]
data
TsvDoc
=
TsvDoc
{
tsv_title
::
!
Text
,
tsv_source
::
!
Text
,
tsv_publication_year
::
!
(
Maybe
IntOrDec
)
,
tsv_publication_month
::
!
(
Maybe
Int
)
,
tsv_publication_day
::
!
(
Maybe
Int
)
,
tsv_abstract
::
!
Text
,
tsv_authors
::
!
Text
}
deriving
(
Show
)
instance
ToNamedRecord
TsvDoc
where
toNamedRecord
(
TsvDoc
{
..
})
=
namedRecord
[
"title"
.=
tsv_title
,
"source"
.=
tsv_source
,
"publication_year"
.=
tsv_publication_year
,
"publication_month"
.=
tsv_publication_month
,
"publication_day"
.=
tsv_publication_day
,
"abstract"
.=
tsv_abstract
,
"authors"
.=
tsv_authors
]
doc2tsv
::
HyperdataDocument
->
TsvDoc
doc2tsv
h
=
TsvDoc
{
tsv_title
=
m
$
_hd_title
h
,
tsv_source
=
m
$
_hd_source
h
,
tsv_publication_year
=
Just
$
IntOrDec
$
mI
$
_hd_publication_year
h
,
tsv_publication_month
=
Just
$
mI
$
_hd_publication_month
h
,
tsv_publication_day
=
Just
$
mI
$
_hd_publication_day
h
,
tsv_abstract
=
m
$
_hd_abstract
h
,
tsv_authors
=
m
$
_hd_authors
h
}
where
m
=
maybe
""
identity
mI
=
maybe
0
identity
src/Gargantext/Core/Text/Corpus/Parsers/TSV/Utils.hs
View file @
b47bbd5f
...
@@ -15,6 +15,8 @@ import Data.ByteString.Lazy qualified as BL
...
@@ -15,6 +15,8 @@ import Data.ByteString.Lazy qualified as BL
import
Data.ByteString.Lazy.Char8
qualified
as
B8L
import
Data.ByteString.Lazy.Char8
qualified
as
B8L
import
Data.Csv
import
Data.Csv
import
Data.Text
(
pack
)
import
Data.Text
(
pack
)
import
Data.Text
qualified
as
T
import
Data.Text.Encoding
qualified
as
TE
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
V
import
Data.Vector
qualified
as
V
import
Gargantext.Core.Text.Corpus.Parsers.TSV.Types
import
Gargantext.Core.Text.Corpus.Parsers.TSV.Types
...
@@ -82,7 +84,14 @@ writeDocs2Tsv :: ToNamedRecord tsvDoc
...
@@ -82,7 +84,14 @@ writeDocs2Tsv :: ToNamedRecord tsvDoc
->
FilePath
->
FilePath
->
[
doc
]
->
[
doc
]
->
IO
()
->
IO
()
writeDocs2Tsv
hdr
doc2tsv
fp
h
s
=
writeDocs2Tsv
hdr
doc2tsv
fp
doc
s
=
BL
.
writeFile
fp
$
encodeByNameWith
(
tsvEncodeOptions
Tab
)
hdr
(
doc2tsv
<$>
h
s
)
BL
.
writeFile
fp
$
encodeByNameWith
(
tsvEncodeOptions
Tab
)
hdr
(
doc2tsv
<$>
doc
s
)
writeDocs2TsvOrdered
::
(
ToNamedRecord
tsvDoc
,
DefaultOrdered
tsvDoc
)
=>
(
doc
->
tsvDoc
)
->
[
doc
]
->
T
.
Text
writeDocs2TsvOrdered
doc2tsv
docs
=
TE
.
decodeUtf8
$
B8L
.
toStrict
$
encodeDefaultOrderedByNameWith
((
tsvEncodeOptions
Tab
)
{
encQuoting
=
QuoteAll
})
(
doc2tsv
<$>
docs
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment