Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
cf6fbff6
Verified
Commit
cf6fbff6
authored
Sep 13, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactoring] more record syntax refactoring
parent
37a36aba
Pipeline
#1802
passed with stage
in 33 minutes and 36 seconds
Changes
14
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
333 additions
and
296 deletions
+333
-296
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+19
-19
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+20
-19
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+19
-19
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+19
-19
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+168
-156
GrandDebat.hs
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
+20
-12
Isidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+19
-12
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+8
-2
RIS.hs
src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
+2
-0
Presse.hs
src/Gargantext/Core/Text/Corpus/Parsers/RIS/Presse.hs
+0
-2
Wikimedia.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikimedia.hs
+4
-2
List.hs
src/Gargantext/Core/Text/List.hs
+20
-20
WithStem.hs
src/Gargantext/Core/Text/List/Group/WithStem.hs
+2
-2
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+13
-12
No files found.
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
cf6fbff6
...
...
@@ -31,23 +31,23 @@ get la q ml = do
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
Just
d
)
pure
$
HyperdataDocument
(
Just
"Hal"
)
(
Just
$
pack
$
show
i
)
Nothing
Nothing
Nothing
Nothing
(
Just
$
intercalate
" "
t
)
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
)
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
)
(
Just
$
maybe
"Nothing"
identity
s
)
(
Just
$
intercalate
" "
ab
)
(
fmap
(
pack
.
show
)
utctime
)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(
Just
$
(
pack
.
show
)
la
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
,
_hd_doi
=
Just
$
pack
$
show
i
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
$
intercalate
" "
t
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
s
,
_hd_abstract
=
Just
$
intercalate
" "
ab
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
cf6fbff6
...
...
@@ -39,22 +39,23 @@ toDoc' la docs' = do
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
(
Just
.
pack
.
show
)
d
)
pure
$
HyperdataDocument
(
Just
"Istex"
)
(
Just
i
)
Nothing
Nothing
Nothing
Nothing
t
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
))
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
))
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
))
ab
(
fmap
(
pack
.
show
)
utctime
)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(
Just
$
(
pack
.
show
)
la
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
)
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
)
,
_hd_source
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
)
,
_hd_abstract
=
ab
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
cf6fbff6
...
...
@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
(
PubMedDoc
.
PubMedDate
a
y
m
d
)
)
=
HyperdataDocument
(
Just
"PubMed"
)
Nothing
Nothing
Nothing
Nothing
Nothing
t
(
authors
aus
)
(
institutes
aus
)
j
(
abstract
as
)
(
Just
$
Text
.
pack
$
show
a
)
(
Just
$
fromIntegral
y
)
(
Just
m
)
(
Just
d
)
Nothing
Nothing
Nothing
(
Just
$
(
Text
.
pack
.
show
)
l
)
)
=
HyperdataDocument
{
_hd_bdd
=
Just
"PubMed"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
,
_hd_authors
=
authors
aus
,
_hd_institutes
=
institutes
aus
,
_hd_source
=
j
,
_hd_abstract
=
abstract
as
,
_hd_publication_date
=
Just
$
Text
.
pack
$
show
a
,
_hd_publication_year
=
Just
$
fromIntegral
y
,
_hd_publication_month
=
Just
m
,
_hd_publication_day
=
Just
d
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
where
authors
::
Maybe
[
PubMedDoc
.
Author
]
->
Maybe
Text
authors
aus'
=
case
aus'
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
cf6fbff6
...
...
@@ -122,25 +122,25 @@ toDoc ff d = do
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
pure
$
HyperdataDocument
(
Just
$
DT
.
pack
$
show
ff
)
(
lookup
"doi"
d
)
(
lookup
"URL"
d
)
Nothing
Nothing
Nothing
(
lookup
"title"
d
)
Nothing
(
lookup
"authors"
d
)
(
lookup
"source"
d
)
(
lookup
"abstract"
d
)
(
fmap
(
DT
.
pack
.
show
)
utcTime
)
(
pub_year
)
(
pub_month
)
(
pub_day
)
Nothing
Nothing
Nothing
(
Just
$
(
DT
.
pack
.
show
)
lang
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
,
_hd_url
=
lookup
"URL"
d
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
lookup
"title"
d
,
_hd_authors
=
Nothing
,
_hd_institutes
=
lookup
"authors"
d
,
_hd_source
=
lookup
"source"
d
,
_hd_abstract
=
lookup
"abstract"
d
,
_hd_publication_date
=
fmap
(
DT
.
pack
.
show
)
utcTime
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
enrichWith
::
FileFormat
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
cf6fbff6
...
...
@@ -62,34 +62,39 @@ data CsvGargV3 = CsvGargV3
-- | Doc 2 HyperdataDocument
toDoc
::
CsvGargV3
->
HyperdataDocument
toDoc
(
CsvGargV3
did
dt
_
dpy
dpm
dpd
dab
dau
)
=
HyperdataDocument
(
Just
"CSV"
)
(
Just
.
pack
.
show
$
did
)
Nothing
Nothing
Nothing
Nothing
(
Just
dt
)
Nothing
(
Just
dau
)
(
Just
dab
)
(
Nothing
)
Nothing
(
Just
dpy
)
(
Just
dpm
)
(
Just
dpd
)
Nothing
Nothing
Nothing
Nothing
HyperdataDocument
{
_hd_bdd
=
Just
"CSV"
,
_hd_doi
=
Just
.
pack
.
show
$
did
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
dt
,
_hd_authors
=
Nothing
,
_hd_institutes
=
Just
dau
,
_hd_source
=
Just
dab
,
_hd_abstract
=
Nothing
,
_hd_publication_date
=
Nothing
,
_hd_publication_year
=
Just
dpy
,
_hd_publication_month
=
Just
dpm
,
_hd_publication_day
=
Just
dpd
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
---------------------------------------------------------------
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
mPy
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
(
fromMIntOrDec
defaultYear
mPy
)
(
fromMaybe
defaultMonth
pm
)
(
fromMaybe
defaultDay
pd
)
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
{
..
})
-- (CsvDoc t s mPy pm pd abst auth)
->
CsvGargV3
{
d_docId
=
nId
,
d_title
=
csv_title
,
d_source
=
csv_source
,
d_publication_year
=
fromMIntOrDec
defaultYear
csv_publication_year
,
d_publication_month
=
fromMaybe
defaultMonth
csv_publication_month
,
d_publication_day
=
fromMaybe
defaultDay
csv_publication_day
,
d_abstract
=
csv_abstract
,
d_authors
=
csv_authors
})
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
...
...
@@ -99,7 +104,13 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
Just
$
IntOrDec
py
)
(
Just
pm
)
(
Just
pd
)
abst
auth
)
fromDocs'
(
CsvGargV3
{
..
})
=
CsvDoc
{
csv_title
=
d_title
,
csv_source
=
d_source
,
csv_publication_year
=
Just
$
IntOrDec
d_publication_year
,
csv_publication_month
=
Just
d_publication_month
,
csv_publication_day
=
Just
d_publication_day
,
csv_abstract
=
d_abstract
,
csv_authors
=
d_authors
}
---------------------------------------------------------------
-- | Split a document in its context
...
...
@@ -117,19 +128,17 @@ splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
V
.
fromList
[
doc
]
where
splitDoc'
::
SplitContext
->
CsvDoc
->
Vector
CsvDoc
splitDoc'
contextSize
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
=
V
.
fromList
$
[
firstDoc
]
<>
nextDocs
splitDoc'
contextSize
(
CsvDoc
{
..
}
)
=
V
.
fromList
$
[
firstDoc
]
<>
nextDocs
where
firstDoc
=
CsvDoc
t
s
py
pm
pd
firstAbstract
auth
firstDoc
=
CsvDoc
{
csv_abstract
=
firstAbstract
,
..
}
firstAbstract
=
head'
"splitDoc'1"
abstracts
nextDocs
=
map
(
\
txt
->
CsvDoc
(
head'
"splitDoc'2"
$
sentences
txt
)
s
py
pm
pd
(
unsentences
$
tail'
"splitDoc'1"
$
sentences
txt
)
auth
nextDocs
=
map
(
\
txt
->
CsvDoc
{
csv_title
=
head'
"splitDoc'2"
$
sentences
txt
,
csv_abstract
=
unsentences
$
tail'
"splitDoc'1"
$
sentences
txt
,
..
}
)
(
tail'
"splitDoc'2"
abstracts
)
abstracts
=
(
splitBy
$
contextSize
)
abs
t
abstracts
=
(
splitBy
$
contextSize
)
csv_abstrac
t
---------------------------------------------------------------
---------------------------------------------------------------
...
...
@@ -174,33 +183,35 @@ data CsvDoc = CsvDoc
deriving
(
Show
)
instance
FromNamedRecord
CsvDoc
where
parseNamedRecord
r
=
CsvDoc
<$>
(
r
.:
"title"
<|>
r
.:
"Title"
)
<*>
(
r
.:
"source"
<|>
r
.:
"Source"
)
<*>
(
r
.:
"publication_year"
<|>
r
.:
"Publication Year"
)
<*>
(
r
.:
"publication_month"
<|>
r
.:
"Publication Month"
)
<*>
(
r
.:
"publication_day"
<|>
r
.:
"Publication Day"
)
<*>
(
r
.:
"abstract"
<|>
r
.:
"Abstract"
)
<*>
(
r
.:
"authors"
<|>
r
.:
"Authors"
)
parseNamedRecord
r
=
do
csv_title
<-
r
.:
"title"
<|>
r
.:
"Title"
csv_source
<-
r
.:
"source"
<|>
r
.:
"Source"
csv_publication_year
<-
r
.:
"publication_year"
<|>
r
.:
"Publication Year"
csv_publication_month
<-
r
.:
"publication_month"
<|>
r
.:
"Publication Month"
csv_publication_day
<-
r
.:
"publication_day"
<|>
r
.:
"Publication Day"
csv_abstract
<-
r
.:
"abstract"
<|>
r
.:
"Abstract"
csv_authors
<-
r
.:
"authors"
<|>
r
.:
"Authors"
pure
$
CsvDoc
{
..
}
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
namedRecord
[
"title"
.=
t
,
"source"
.=
s
,
"publication_year"
.=
py
,
"publication_month"
.=
pm
,
"publication_day"
.=
pd
,
"abstract"
.=
abs
t
,
"authors"
.=
aut
toNamedRecord
(
CsvDoc
{
..
}
)
=
namedRecord
[
"title"
.=
csv_title
,
"source"
.=
csv_source
,
"publication_year"
.=
csv_publication_year
,
"publication_month"
.=
csv_publication_month
,
"publication_day"
.=
csv_publication_day
,
"abstract"
.=
csv_abstrac
t
,
"authors"
.=
csv_authors
]
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
Just
$
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
Just
$
mI
$
_hd_publication_month
h
)
(
Just
$
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_authors
h
)
hyperdataDocument2csvDoc
h
=
CsvDoc
{
csv_title
=
m
$
_hd_title
h
,
csv_source
=
m
$
_hd_source
h
,
csv_publication_year
=
Just
$
IntOrDec
$
mI
$
_hd_publication_year
h
,
csv_publication_month
=
Just
$
mI
$
_hd_publication_month
h
,
csv_publication_day
=
Just
$
mI
$
_hd_publication_day
h
,
csv_abstract
=
m
$
_hd_abstract
h
,
csv_authors
=
m
$
_hd_authors
h
}
where
m
=
maybe
""
identity
...
...
@@ -300,110 +311,109 @@ data CsvHal = CsvHal
deriving
(
Show
)
instance
FromNamedRecord
CsvHal
where
parseNamedRecord
r
=
CsvHal
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"url"
<*>
r
.:
"isbn_s"
<*>
r
.:
"issue_s"
<*>
r
.:
"journalPublisher_s"
<*>
r
.:
"language_s"
<*>
r
.:
"doiId_s"
<*>
r
.:
"authId_i"
<*>
r
.:
"instStructId_i"
<*>
r
.:
"deptStructId_i"
<*>
r
.:
"labStructId_i"
<*>
r
.:
"rteamStructId_i"
<*>
r
.:
"docType_s"
parseNamedRecord
r
=
do
csvHal_title
<-
r
.:
"title"
csvHal_source
<-
r
.:
"source"
csvHal_publication_year
<-
r
.:
"publication_year"
csvHal_publication_month
<-
r
.:
"publication_month"
csvHal_publication_day
<-
r
.:
"publication_day"
csvHal_abstract
<-
r
.:
"abstract"
csvHal_authors
<-
r
.:
"authors"
csvHal_url
<-
r
.:
"url"
csvHal_isbn_s
<-
r
.:
"isbn_s"
csvHal_issue_s
<-
r
.:
"issue_s"
csvHal_journalPublisher_s
<-
r
.:
"journalPublisher_s"
csvHal_language_s
<-
r
.:
"language_s"
csvHal_doiId_s
<-
r
.:
"doiId_s"
csvHal_authId_i
<-
r
.:
"authId_i"
csvHal_instStructId_i
<-
r
.:
"instStructId_i"
csvHal_deptStructId_i
<-
r
.:
"deptStructId_i"
csvHal_labStructId_i
<-
r
.:
"labStructId_i"
csvHal_rteamStructId_i
<-
r
.:
"rteamStructId_i"
csvHal_docType_s
<-
r
.:
"docType_s"
pure
$
CsvHal
{
..
}
instance
ToNamedRecord
CsvHal
where
toNamedRecord
(
CsvHal
t
s
py
pm
pd
abst
aut
url
isbn
iss
j
lang
doi
auth
inst
dept
lab
team
doct
)
=
namedRecord
[
"title"
.=
t
,
"source"
.=
s
,
"publication_year"
.=
py
,
"publication_month"
.=
pm
,
"publication_day"
.=
pd
,
"abstract"
.=
abst
,
"authors"
.=
aut
,
"url"
.=
url
,
"isbn_s"
.=
isbn
,
"issue_s"
.=
iss
,
"journalPublisher_s"
.=
j
,
"language_s"
.=
lang
,
"doiId_s"
.=
doi
,
"authId_i"
.=
auth
,
"instStructId_i"
.=
inst
,
"deptStructId_i"
.=
dept
,
"labStructId_i"
.=
lab
--toNamedRecord (CsvHal t s py pm pd abst aut url isbn iss j lang doi auth inst dept lab team doct) =
toNamedRecord
(
CsvHal
{
..
})
=
namedRecord
[
"title"
.=
csvHal_title
,
"source"
.=
csvHal_source
,
"publication_year"
.=
csvHal_publication_year
,
"publication_month"
.=
csvHal_publication_month
,
"publication_day"
.=
csvHal_publication_day
,
"abstract"
.=
csvHal_abstract
,
"authors"
.=
csvHal_authors
,
"url"
.=
csvHal_url
,
"isbn_s"
.=
csvHal_isbn_s
,
"issue_s"
.=
csvHal_issue_s
,
"journalPublisher_s"
.=
csvHal_journalPublisher_s
,
"language_s"
.=
csvHal_language_s
,
"doiId_s"
.=
csvHal_doiId_s
,
"authId_i"
.=
csvHal_authId_i
,
"instStructId_i"
.=
csvHal_instStructId_i
,
"deptStructId_i"
.=
csvHal_deptStructId_i
,
"labStructId_i"
.=
csvHal_labStructId_i
,
"rteamStructId_i"
.=
team
,
"docType_s"
.=
doct
,
"rteamStructId_i"
.=
csvHal_rteamStructId_i
,
"docType_s"
.=
csvHal_docType_s
]
csvHal2doc
::
CsvHal
->
HyperdataDocument
csvHal2doc
(
CsvHal
title
source
pub_year
pub_month
pub_day
abstract
authors
url
_
_
_
_
doi
_
inst
_
_
_
_
)
=
HyperdataDocument
(
Just
"CsvHal"
)
(
Just
doi
)
(
Just
url
)
Nothing
Nothing
Nothing
(
Just
title
)
(
Just
authors
)
(
Just
inst
)
(
Just
source
)
(
Just
abstract
)
(
Just
$
pack
.
show
$
jour
pub_year
pub_month
pub_day
)
(
Just
$
fromIntegral
pub_year
)
(
Just
pub_month
)
(
Just
pub_day
)
Nothing
Nothing
Nothing
Nothing
csvHal2doc
(
CsvHal
{
..
})
=
HyperdataDocument
{
_hd_bdd
=
Just
"CsvHal"
,
_hd_doi
=
Just
csvHal_doiId_s
,
_hd_url
=
Just
csvHal_url
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
csvHal_title
,
_hd_authors
=
Just
csvHal_authors
,
_hd_institutes
=
Just
csvHal_instStructId_i
,
_hd_source
=
Just
csvHal_source
,
_hd_abstract
=
Just
csvHal_abstract
,
_hd_publication_date
=
Just
$
pack
.
show
$
jour
csvHal_publication_year
csvHal_publication_month
csvHal_publication_day
,
_hd_publication_year
=
Just
$
fromIntegral
csvHal_publication_year
,
_hd_publication_month
=
Just
csvHal_publication_month
,
_hd_publication_day
=
Just
csvHal_publication_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
mPubYear
mPubMonth
mPubDay
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
Nothing
Nothing
Nothing
(
Just
title
)
(
Just
authors
)
Nothing
(
Just
source
)
(
Just
abstract
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pubYear
)
pubMonth
pubDay
)
(
Just
pubYear
)
(
Just
pubMonth
)
(
Just
pubDay
)
Nothing
Nothing
Nothing
Nothing
csv2doc
(
CsvDoc
{
..
})
=
HyperdataDocument
{
_hd_bdd
=
Just
"CsvHal"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
csv_title
,
_hd_authors
=
Just
csv_authors
,
_hd_institutes
=
Nothing
,
_hd_source
=
Just
csv_source
,
_hd_abstract
=
Just
csv_abstract
,
_hd_publication_date
=
Just
$
pack
.
show
$
jour
(
fromIntegral
pubYear
)
pubMonth
pubDay
,
_hd_publication_year
=
Just
pubYear
,
_hd_publication_month
=
Just
pubMonth
,
_hd_publication_day
=
Just
pubDay
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
where
pubYear
=
fromMIntOrDec
defaultYear
mPubY
ear
pubMonth
=
fromMaybe
defaultMonth
mPubM
onth
pubDay
=
fromMaybe
defaultDay
mPubD
ay
pubYear
=
fromMIntOrDec
defaultYear
csv_publication_y
ear
pubMonth
=
fromMaybe
defaultMonth
csv_publication_m
onth
pubDay
=
fromMaybe
defaultDay
csv_publication_d
ay
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
...
...
@@ -438,14 +448,16 @@ data Csv' = Csv'
instance
FromNamedRecord
Csv'
where
parseNamedRecord
r
=
Csv'
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
parseNamedRecord
r
=
do
csv'_title
<-
r
.:
"title"
csv'_source
<-
r
.:
"source"
csv'_publication_year
<-
r
.:
"publication_year"
csv'_publication_month
<-
r
.:
"publication_month"
csv'_publication_day
<-
r
.:
"publication_day"
csv'_abstract
<-
r
.:
"abstract"
csv'_authors
<-
r
.:
"authors"
csv'_weight
<-
r
.:
"weight"
pure
$
Csv'
{
..
}
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
Csv'
)
readWeightedCsv
fp
=
...
...
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
View file @
cf6fbff6
...
...
@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference
instance
ToHyperdataDocument
GrandDebatReference
where
toHyperdataDocument
(
GrandDebatReference
id'
_ref
title'
_createdAt'
publishedAt'
_updatedAt
_trashed
_trashedStatus
_authorId
authorType'
authorZipCode'
responses'
)
=
HyperdataDocument
(
Just
"GrandDebat"
)
id'
Nothing
Nothing
Nothing
Nothing
title'
authorType'
authorType'
authorZipCode'
(
toAbstract
<$>
responses'
)
publishedAt'
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
(
Just
$
Text
.
pack
$
show
FR
)
toHyperdataDocument
(
GrandDebatReference
{
id
,
title
,
publishedAt
,
authorType
,
authorZipCode
,
responses
})
=
HyperdataDocument
{
_hd_bdd
=
Just
"GrandDebat"
,
_hd_doi
=
id
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
title
,
_hd_authors
=
authorType
,
_hd_institutes
=
authorType
,
_hd_source
=
authorZipCode
,
_hd_abstract
=
toAbstract
<$>
responses
,
_hd_publication_date
=
publishedAt
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
}
where
toAbstract
=
(
Text
.
intercalate
" . "
)
.
((
filter
(
/=
""
))
.
(
map
toSentence
))
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
View file @
cf6fbff6
...
...
@@ -119,17 +119,24 @@ unbound _ _ = Nothing
bind2doc
::
Lang
->
[
BindingValue
]
->
HyperdataDocument
bind2doc
l
[
link
,
date
,
langDoc
,
authors
,
_source
,
publisher
,
title
,
abstract
]
=
HyperdataDocument
(
Just
"Isidore"
)
Nothing
(
unbound
l
link
)
Nothing
Nothing
Nothing
(
unbound
l
title
)
(
unbound
l
authors
)
Nothing
(
unbound
l
publisher
)
(
unbound
l
abstract
)
(
unbound
l
date
)
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
(
unbound
l
langDoc
)
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
,
_hd_doi
=
Nothing
,
_hd_url
=
unbound
l
link
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
unbound
l
title
,
_hd_authors
=
unbound
l
authors
,
_hd_institutes
=
Nothing
,
_hd_source
=
unbound
l
publisher
,
_hd_abstract
=
unbound
l
abstract
,
_hd_publication_date
=
unbound
l
date
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
unbound
l
langDoc
}
bind2doc
_
_
=
undefined
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
cf6fbff6
...
...
@@ -48,8 +48,14 @@ json2csv fin fout = do
writeFile
fout
(
headerCsvGargV3
,
fromList
$
map
patent2csvDoc
patents
)
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
CsvDoc
title
"Source"
(
Just
$
read
(
unpack
year
))
(
Just
1
)
(
Just
1
)
abstract
"Authors"
patent2csvDoc
(
Patent
{
..
})
=
CsvDoc
{
csv_title
=
_patent_title
,
csv_source
=
"Source"
,
csv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
csv_publication_month
=
Just
1
,
csv_publication_day
=
Just
1
,
csv_abstract
=
_patent_abstract
,
csv_authors
=
"Authors"
}
...
...
src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
View file @
cf6fbff6
...
...
@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
onField
k
f
m
=
m
<>
(
maybe
[]
f
(
lookup
k
m
)
)
src/Gargantext/Core/Text/Corpus/Parsers/RIS/Presse.hs
View file @
cf6fbff6
...
...
@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns
|
champs
==
"UR"
=
"url"
|
champs
==
"N2"
=
abstract
|
otherwise
=
champs
src/Gargantext/Core/Text/Corpus/Parsers/Wikimedia.hs
View file @
cf6fbff6
...
...
@@ -95,7 +95,9 @@ parsePage =
revision
<-
parseRevision
many_
$
ignoreAnyTreeContent
return
$
Page
Mediawiki
title
revision
return
$
Page
{
_markupFormat
=
Mediawiki
,
_title
=
title
,
_text
=
revision
}
parseMediawiki
::
MonadThrow
m
=>
ConduitT
Event
Page
m
(
Maybe
()
)
parseMediawiki
=
...
...
@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain
page
=
do
title
<-
mediaToPlain
$
_title
page
revision
<-
mediaToPlain
$
_text
page
return
$
Page
Plaintext
title
revision
return
$
Page
{
_markupFormat
=
Plaintext
,
_title
=
title
,
_text
=
revision
}
where
mediaToPlain
media
=
case
media
of
(
Nothing
)
->
return
Nothing
...
...
src/Gargantext/Core/Text/List.hs
View file @
cf6fbff6
...
...
@@ -86,17 +86,17 @@ buildNgramsLists user uCid mCid mfslw gp = do
data
MapListSize
=
MapListSize
{
unMapListSize
::
!
Int
}
buildNgramsOthersList
::
(
HasNodeError
err
,
CmdM
env
err
m
,
HasNodeStory
env
err
m
,
HasTreeError
err
)
=>
User
->
UserCorpusId
->
Maybe
FlowSocialListWith
->
GroupParams
->
(
NgramsType
,
MapListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
::
(
HasNodeError
err
,
CmdM
env
err
m
,
HasNodeStory
env
err
m
,
HasTreeError
err
)
=>
User
->
UserCorpusId
->
Maybe
FlowSocialListWith
->
GroupParams
->
(
NgramsType
,
MapListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
user
uCid
mfslw
_groupParams
(
nt
,
MapListSize
mapListSize
)
=
do
allTerms
::
HashMap
NgramsTerm
(
Set
NodeId
)
<-
getNodesByNgramsUser
uCid
nt
...
...
@@ -106,7 +106,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize)
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
)
)
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists
allTerms
...
...
@@ -148,13 +148,13 @@ buildNgramsTermsList :: ( HasNodeError err
,
HasNodeStory
env
err
m
,
HasTreeError
err
)
=>
User
->
UserCorpusId
->
MasterCorpusId
->
Maybe
FlowSocialListWith
->
GroupParams
->
(
NgramsType
,
MapListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
=>
User
->
UserCorpusId
->
MasterCorpusId
->
Maybe
FlowSocialListWith
->
GroupParams
->
(
NgramsType
,
MapListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
user
uCid
mCid
mfslw
groupParams
(
nt
,
_mapListSize
)
=
do
-- Filter 0 With Double
...
...
@@ -170,7 +170,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
)
)
printDebug
"[buldNgramsTermsList: Flow Social List / end]"
nt
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
...
...
src/Gargantext/Core/Text/List/Group/WithStem.hs
View file @
cf6fbff6
...
...
@@ -72,7 +72,7 @@ groupWith :: GroupParams
->
NgramsTerm
->
NgramsTerm
groupWith
GroupIdentity
t
=
identity
t
groupWith
(
GroupParams
l
_m
_n
_
)
t
=
groupWith
(
GroupParams
{
unGroupParams_lang
=
l
}
)
t
=
NgramsTerm
$
Text
.
intercalate
" "
$
map
(
stem
l
)
...
...
@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t =
$
unNgramsTerm
t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith
(
GroupWithPosTag
_
_
m
)
t
=
groupWith
(
GroupWithPosTag
{
_gwl_map
=
m
}
)
t
=
case
HashMap
.
lookup
(
unNgramsTerm
t
)
m
of
Nothing
->
clean
t
Just
t'
->
clean
$
NgramsTerm
t'
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
cf6fbff6
...
...
@@ -82,11 +82,11 @@ makeLenses ''TermType
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
Terms
]]
extractTerms
(
Unsupervised
l
n
s
m
)
xs
=
mapM
(
terms
(
Unsupervised
l
n
s
(
Just
m'
)
))
xs
extractTerms
(
Unsupervised
{
..
})
xs
=
mapM
(
terms
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}
))
xs
where
m'
=
case
m
of
m'
=
case
_tt_model
of
Just
m''
->
m''
Nothing
->
newTries
n
(
Text
.
intercalate
" "
xs
)
Nothing
->
newTries
_tt_windowSize
(
Text
.
intercalate
" "
xs
)
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
...
...
@@ -96,15 +96,16 @@ withLang :: (Foldable t, Functor t, HasText h)
=>
TermType
Lang
->
t
h
->
TermType
Lang
withLang
(
Unsupervised
l
n
s
m
)
ns
=
Unsupervised
l
n
s
m'
withLang
(
Unsupervised
{
..
})
ns
=
Unsupervised
{
_tt_model
=
m'
,
..
}
where
m'
=
case
m
of
m'
=
case
_tt_model
of
Nothing
->
-- trace ("buildTries here" :: String)
Just
$
buildTries
n
$
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
$
map
hasText
ns
Just
$
buildTries
_tt_ngramsSize
$
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
$
map
hasText
ns
just_m
->
just_m
withLang
l
_
=
l
...
...
@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
Unsupervised
lang
n
s
m
)
txt
=
termsUnsupervised
(
Unsupervised
lang
n
s
(
Just
m'
)
)
txt
terms
(
Unsupervised
{
..
})
txt
=
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}
)
txt
where
m'
=
maybe
(
newTries
n
txt
)
identity
m
m'
=
maybe
(
newTries
_tt_ngramsSize
txt
)
identity
_tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment