Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
fbfb5d60
Commit
fbfb5d60
authored
Jan 17, 2018
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Date parsing for ISI (Web of Science)
parent
c59d8b79
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
0 deletions
+36
-0
ISI.py
gargantext/util/parsers/ISI.py
+33
-0
_Parser.py
gargantext/util/parsers/_Parser.py
+3
-0
No files found.
gargantext/util/parsers/ISI.py
View file @
fbfb5d60
import
re
from
.RIS
import
RISParser
from
.RIS
import
RISParser
...
@@ -17,3 +19,34 @@ class ISIParser(RISParser):
...
@@ -17,3 +19,34 @@ class ISIParser(RISParser):
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
}
}
_year
=
re
.
compile
(
r'\b\d{4}\b'
)
_season
=
re
.
compile
(
r'\b(SPR|SUM|FAL|WIN)\b'
,
re
.
I
)
_month_interval
=
re
.
compile
(
r'\b([A-Z]{3})-([A-Z]{3})\b'
,
re
.
I
)
_day_interval
=
re
.
compile
(
r'\b(\d{1,2})-(\d{1,2})\b'
)
def
_preprocess_PD
(
self
,
PD
,
PY
):
# Add a year to date if applicable
if
PY
and
self
.
_year
.
search
(
PY
)
and
not
self
.
_year
.
search
(
PD
):
PD
=
PY
+
" "
+
PD
# Drop season if any
PD
=
self
.
_season
.
sub
(
''
,
PD
)
.
strip
()
# If a month interval is present, keep only the first month
PD
=
self
.
_month_interval
.
sub
(
r'\1'
,
PD
)
# If a day interval is present, keep only the first day
PD
=
self
.
_day_interval
.
sub
(
r'\1'
,
PD
)
return
PD
def
parse
(
self
,
file
):
PD
=
self
.
_parameters
[
"PD"
][
"key"
]
PY
=
self
.
_parameters
[
"PY"
][
"key"
]
for
entry
in
super
()
.
parse
(
file
):
if
PD
in
entry
:
entry
[
PD
]
=
self
.
_preprocess_PD
(
entry
[
PD
],
entry
[
PY
])
yield
entry
gargantext/util/parsers/_Parser.py
View file @
fbfb5d60
...
@@ -86,6 +86,9 @@ class Parser:
...
@@ -86,6 +86,9 @@ class Parser:
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
hyperdata
[
'publication_date'
]
=
datetime
.
now
()
hyperdata
[
'publication_date'
]
=
datetime
.
now
()
# XXX Handling prefixes is most likely useless: there seem to be only
# one prefix which is "publication" (like in "publication_date").
# ...then parse all the "date" fields, to parse it into separate elements
# ...then parse all the "date" fields, to parse it into separate elements
prefixes
=
[
key
[:
-
5
]
for
key
in
hyperdata
.
keys
()
if
key
[
-
5
:]
==
"_date"
]
prefixes
=
[
key
[:
-
5
]
for
key
in
hyperdata
.
keys
()
if
key
[
-
5
:]
==
"_date"
]
for
prefix
in
prefixes
:
for
prefix
in
prefixes
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment