Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
17109415
Commit
17109415
authored
Jan 25, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DEMO] new format for IMT Annuaire.
parent
bea2c142
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
103 additions
and
51 deletions
+103
-51
IMTUser.hs
src/Gargantext/Core/Ext/IMTUser.hs
+92
-39
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+11
-12
No files found.
src/Gargantext/Core/Ext/IMTUser.hs
View file @
17109415
...
...
@@ -13,72 +13,125 @@ Here is writtent a common interface.
-}
module
Gargantext.Core.Ext.IMTUser
(
deserialiseImtUsersFromFile
)
module
Gargantext.Core.Ext.IMTUser
--
(deserialiseImtUsersFromFile)
where
import
Codec.Serialise
import
Data.Csv
import
Data.Either
import
Data.Maybe
(
catMaybes
)
import
Data.Text
(
Text
)
import
Data.Vector
(
Vector
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
import
Gargantext.Prelude
import
System.IO
(
FilePath
)
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.ByteString.Lazy
as
BL
import
Gargantext.Core.Text.Corpus.Parsers.CSV
------------------------------------------------------------------------
instance
Serialise
IMTUser
deserialiseImtUsersFromFile
::
FilePath
->
IO
[
HyperdataContact
]
deserialiseImtUsersFromFile
filepath
=
map
imtUser2gargContact
<$>
deserialiseFromFile'
filepath
deserialiseFromFile'
::
FilePath
->
IO
[
IMTUser
]
deserialiseFromFile'
filepath
=
deserialise
<$>
BSL
.
readFile
filepath
------------------------------------------------------------------------
data
IMTUser
=
IMTUser
{
id
::
Text
,
entite
::
Maybe
Text
,
mail
::
Maybe
Text
,
nom
::
Maybe
Text
,
prenom
::
Maybe
Text
,
fonction
::
Maybe
Text
,
tel
::
Maybe
Text
,
fax
::
Maybe
Text
,
service
::
Maybe
Text
,
groupe
::
Maybe
Text
,
bureau
::
Maybe
Text
,
url
::
Maybe
Text
,
pservice
::
Maybe
Text
,
pfonction
::
Maybe
Text
,
afonction
::
Maybe
Text
,
grprech
::
Maybe
Text
,
lieu
::
Maybe
Text
{
id
::
Maybe
Text
,
entite
::
Maybe
Text
,
mail
::
Maybe
Text
,
nom
::
Maybe
Text
,
prenom
::
Maybe
Text
,
fonction
::
Maybe
Text
,
fonction2
::
Maybe
Text
,
tel
::
Maybe
Text
,
fax
::
Maybe
Text
,
service
::
Maybe
Text
,
groupe
::
Maybe
Text
,
entite2
::
Maybe
Text
,
service2
::
Maybe
Text
,
groupe2
::
Maybe
Text
,
bureau
::
Maybe
Text
,
url
::
Maybe
Text
,
pservice
::
Maybe
Text
,
pfonction
::
Maybe
Text
,
afonction
::
Maybe
Text
,
afonction2
::
Maybe
Text
,
grprech
::
Maybe
Text
,
appellation
::
Maybe
Text
,
lieu
::
Maybe
Text
,
aprecision
::
Maybe
Text
,
atel
::
Maybe
Text
,
sexe
::
Maybe
Text
,
statut
::
Maybe
Text
,
idutilentite
::
Maybe
Text
,
entite2
::
Maybe
Text
,
service2
::
Maybe
Text
,
groupe2
::
Maybe
Text
,
actif
::
Maybe
Text
,
actif
::
Maybe
Text
,
idutilsiecoles
::
Maybe
Text
,
date_modification
::
Maybe
Text
}
deriving
(
Eq
,
Show
,
Generic
)
-- | CSV instance
instance
FromNamedRecord
IMTUser
where
parseNamedRecord
r
=
IMTUser
<$>
r
.:
"id"
<*>
r
.:
"entite"
<*>
r
.:
"mail"
<*>
r
.:
"nom"
<*>
r
.:
"prenom"
<*>
r
.:
"fonction"
<*>
r
.:
"fonction2"
<*>
r
.:
"tel"
<*>
r
.:
"fax"
<*>
r
.:
"service"
<*>
r
.:
"groupe"
<*>
r
.:
"entite2"
<*>
r
.:
"service2"
<*>
r
.:
"groupe2"
<*>
r
.:
"bureau"
<*>
r
.:
"url"
<*>
r
.:
"pservice"
<*>
r
.:
"pfonction"
<*>
r
.:
"afonction"
<*>
r
.:
"afonction2"
<*>
r
.:
"grprech"
<*>
r
.:
"appellation"
<*>
r
.:
"lieu"
<*>
r
.:
"aprecision"
<*>
r
.:
"atel"
<*>
r
.:
"sexe"
<*>
r
.:
"statut"
<*>
r
.:
"idutilentite"
<*>
r
.:
"actif"
<*>
r
.:
"idutilsiecoles"
<*>
r
.:
"date_modification"
headerCSVannuaire
::
Header
headerCSVannuaire
=
header
[
"id"
,
"entite"
,
"mail"
,
"nom"
,
"prenom"
,
"fonction"
,
"fonction2"
,
"tel"
,
"fax"
,
"service"
,
"groupe"
,
"entite2"
,
"service2"
,
"groupe2"
,
"bureau"
,
"url"
,
"pservice"
,
"pfonction"
,
"afonction"
,
"afonction2"
,
"grprech"
,
"appellation"
,
"lieu"
,
"aprecision"
,
"atel"
,
"sexe"
,
"statut"
,
"idutilentite"
,
"actif"
,
"idutilsiecoles"
,
"date_modification"
]
readFile_Annuaire
::
FilePath
->
IO
(
Header
,
Vector
IMTUser
)
readFile_Annuaire
=
fmap
readCsvHalLazyBS'
.
BL
.
readFile
where
readCsvHalLazyBS'
::
BL
.
ByteString
->
(
Header
,
Vector
IMTUser
)
readCsvHalLazyBS'
bs
=
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
cs
e
)
Right
rows
->
rows
------------------------------------------------------------------------
-- | Serialization for optimization
instance
Serialise
IMTUser
deserialiseImtUsersFromFile
::
FilePath
->
IO
[
HyperdataContact
]
deserialiseImtUsersFromFile
filepath
=
map
imtUser2gargContact
<$>
deserialiseFromFile'
filepath
deserialiseFromFile'
::
FilePath
->
IO
[
IMTUser
]
deserialiseFromFile'
filepath
=
deserialise
<$>
BL
.
readFile
filepath
------------------------------------------------------------------------
imtUser2gargContact
::
IMTUser
->
HyperdataContact
imtUser2gargContact
(
IMTUser
id'
entite'
mail'
nom'
prenom'
fonction'
tel'
_fax'
service'
_groupe'
bureau'
url'
_pservice'
_pfonction'
_afonction
'
_grprech'
lieu'
_aprecision'
_atel'
_sexe'
_statut'
_idutilentite'
_
entite2'
_service2'
_group2'
_
actif'
_idutilsiecoles'
date_modification'
)
imtUser2gargContact
(
IMTUser
id'
entite'
mail'
nom'
prenom'
fonction'
_fonction2'
tel'
_fax'
service'
_groupe'
_entite2
_service2
_group2
bureau'
url'
_pservice'
_pfonction'
_afonction'
_afonction2
'
_grprech'
_appellation'
lieu'
_aprecision'
_atel'
_sexe'
_statut'
_idutilentite'
_actif'
_idutilsiecoles'
date_modification'
)
=
HyperdataContact
(
Just
"IMT Annuaire"
)
(
Just
qui
)
[
ou
]
((
<>
)
<$>
(
fmap
(
\
p
->
p
<>
" "
)
prenom'
)
<*>
nom'
)
entite'
date_modification'
Nothing
Nothing
where
qui
=
ContactWho
(
Just
id'
)
prenom'
nom'
(
catMaybes
[
service'
])
[]
qui
=
ContactWho
id'
prenom'
nom'
(
catMaybes
[
service'
])
[]
ou
=
ContactWhere
(
toList
entite'
)
(
toList
service'
)
fonction'
bureau'
(
Just
"France"
)
lieu'
contact
Nothing
Nothing
contact
=
Just
$
ContactTouch
mail'
tel'
url'
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList
Nothing
=
[]
toList
(
Just
x
)
=
[
x
]
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
17109415
...
...
@@ -34,7 +34,8 @@ import Gargantext.Core.Text.Context
---------------------------------------------------------------
headerCsvGargV3
::
Header
headerCsvGargV3
=
header
[
"title"
headerCsvGargV3
=
header
[
"title"
,
"source"
,
"publication_year"
,
"publication_month"
...
...
@@ -44,9 +45,9 @@ headerCsvGargV3 = header [ "title"
]
---------------------------------------------------------------
data
CsvGargV3
=
CsvGargV3
{
d_docId
::
!
Int
,
d_title
::
!
Text
,
d_source
::
!
Text
{
d_docId
::
!
Int
,
d_title
::
!
Text
,
d_source
::
!
Text
,
d_publication_year
::
!
Int
,
d_publication_month
::
!
Int
,
d_publication_day
::
!
Int
...
...
@@ -115,14 +116,14 @@ splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
where
firstDoc
=
CsvDoc
t
s
py
pm
pd
firstAbstract
auth
firstAbstract
=
head'
"splitDoc'1"
abstracts
nextDocs
=
map
(
\
txt
->
CsvDoc
(
head'
"splitDoc'2"
$
sentences
txt
)
s
py
pm
pd
(
unsentences
$
tail'
"splitDoc'1"
$
sentences
txt
)
auth
)
(
tail'
"splitDoc'2"
abstracts
)
abstracts
=
(
splitBy
$
contextSize
)
abst
---------------------------------------------------------------
...
...
@@ -226,7 +227,6 @@ readCsvLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
Right
csvDocs
->
csvDocs
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal
::
FilePath
->
IO
(
Header
,
Vector
CsvHal
)
readCsvHal
=
fmap
readCsvHalLazyBS
.
BL
.
readFile
...
...
@@ -307,11 +307,11 @@ instance ToNamedRecord CsvHal where
toNamedRecord
(
CsvHal
t
s
py
pm
pd
abst
aut
url
isbn
iss
j
lang
doi
auth
inst
dept
lab
team
doct
)
=
namedRecord
[
"title"
.=
t
,
"source"
.=
s
,
"publication_year"
.=
py
,
"publication_month"
.=
pm
,
"publication_day"
.=
pd
,
"abstract"
.=
abst
,
"authors"
.=
aut
...
...
@@ -320,13 +320,13 @@ instance ToNamedRecord CsvHal where
,
"issue_s"
.=
iss
,
"journalPublisher_s"
.=
j
,
"language_s"
.=
lang
,
"doiId_s"
.=
doi
,
"authId_i"
.=
auth
,
"instStructId_i"
.=
inst
,
"deptStructId_i"
.=
dept
,
"labStructId_i"
.=
lab
,
"rteamStructId_i"
.=
team
,
"docType_s"
.=
doct
]
...
...
@@ -389,7 +389,6 @@ parseHal' :: BL.ByteString -> [HyperdataDocument]
parseHal'
=
V
.
toList
.
V
.
map
csvHal2doc
.
snd
.
readCsvHalLazyBS
------------------------------------------------------------------------
parseCsv
::
FilePath
->
IO
[
HyperdataDocument
]
parseCsv
fp
=
V
.
toList
<$>
V
.
map
csv2doc
<$>
snd
<$>
readFile
fp
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment