Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
a9f54519
Commit
a9f54519
authored
Jun 01, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Ajout de REPEC parser, format RIS.
parent
7bc5d3bd
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
92 additions
and
17 deletions
+92
-17
constants.py
gargantext/constants.py
+7
-1
Ris.py
gargantext/util/parsers/Ris.py
+2
-1
Ris_repec.py
gargantext/util/parsers/Ris_repec.py
+67
-0
_Parser.py
gargantext/util/parsers/_Parser.py
+9
-9
__init__.py
gargantext/util/parsers/__init__.py
+7
-6
No files found.
gargantext/constants.py
View file @
a9f54519
...
@@ -128,7 +128,7 @@ LANGUAGES = {
...
@@ -128,7 +128,7 @@ LANGUAGES = {
from
gargantext.util.parsers
import
\
from
gargantext.util.parsers
import
\
EuropressParser
,
RISParser
,
PubmedParser
,
ISIParser
,
CSVParser
,
ISTexParser
,
CernParser
EuropressParser
,
RISParser
,
PubmedParser
,
ISIParser
,
CSVParser
,
ISTexParser
,
CernParser
,
RepecParser
def
resourcetype
(
name
):
def
resourcetype
(
name
):
'''
'''
...
@@ -208,6 +208,12 @@ RESOURCETYPES = [
...
@@ -208,6 +208,12 @@ RESOURCETYPES = [
#~ "base_url": "http://api.scoap3.org/search?",
#~ "base_url": "http://api.scoap3.org/search?",
},
},
# type 11
{
'name'
:
'REPEC (RIS format)'
,
'parser'
:
RepecParser
,
'default_language'
:
'en'
,
},
]
]
# linguistic extraction parameters ---------------------------------------------
# linguistic extraction parameters ---------------------------------------------
...
...
gargantext/util/parsers/Ris.py
View file @
a9f54519
...
@@ -18,11 +18,12 @@ class RISParser(Parser):
...
@@ -18,11 +18,12 @@ class RISParser(Parser):
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
"
,
"
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
"
\n
"
},
b
"T2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"T2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"N1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"references"
,
"separator"
:
", "
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_iso2"
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_iso2"
},
b
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
b
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
b
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
b
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
...
...
gargantext/util/parsers/Ris_repec.py
0 → 100644
View file @
a9f54519
from
._Parser
import
Parser
from
gargantext.util.languages
import
languages
#from admin.utils import PrintException
class
RepecParser
(
Parser
):
# def __init__(self, language_cache=None):
#
# #super(Parser, self).__init__()
# #super(Parser, self).__init__()
# self._languages_cache = LanguagesCache() if language_cache is None else language_cache
_begin
=
6
_parameters
=
{
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"T1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"A1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
"
\n
"
},
b
"JO"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"Y1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"N1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"references"
,
"separator"
:
", "
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_iso2"
},
b
"N2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
b
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
}
def
parse
(
self
,
file
):
hyperdata
=
{}
last_key
=
None
last_values
=
[]
# browse every line of the file
for
line
in
file
:
if
len
(
line
)
>
2
:
# extract the parameter key
parameter_key
=
line
[:
2
]
if
parameter_key
!=
b
' '
and
parameter_key
!=
last_key
:
if
last_key
in
self
.
_parameters
:
# translate the parameter key
parameter
=
self
.
_parameters
[
last_key
]
if
parameter
[
"type"
]
==
"hyperdata"
:
separator
=
parameter
[
"separator"
]
if
"separator"
in
parameter
else
""
if
parameter
[
"key"
]
==
"publication_year"
:
hyperdata
[
parameter
[
"key"
]]
=
separator
.
join
(
last_values
)[:
4
]
else
:
hyperdata
[
parameter
[
"key"
]]
=
separator
.
join
(
last_values
)
elif
parameter
[
"type"
]
==
"delimiter"
:
if
'language_fullname'
not
in
hyperdata
.
keys
():
if
'language_iso3'
not
in
hyperdata
.
keys
():
if
'language_iso2'
not
in
hyperdata
.
keys
():
hyperdata
[
'language_iso2'
]
=
'en'
yield
hyperdata
hyperdata
=
{}
last_key
=
parameter_key
last_values
=
[]
try
:
last_values
.
append
(
line
[
self
.
_begin
:
-
1
]
.
decode
())
except
Exception
as
error
:
print
(
error
)
# if a hyperdata object is left in memory, yield it as well
if
hyperdata
:
yield
hyperdata
gargantext/util/parsers/_Parser.py
View file @
a9f54519
...
@@ -67,19 +67,19 @@ class Parser:
...
@@ -67,19 +67,19 @@ class Parser:
date_string
=
hyperdata
[
prefix
+
"_year"
]
date_string
=
hyperdata
[
prefix
+
"_year"
]
key
=
prefix
+
"_month"
key
=
prefix
+
"_month"
if
key
in
hyperdata
:
if
key
in
hyperdata
:
date_string
+=
" "
+
hyperdata
[
key
]
date_string
+=
" "
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_day"
key
=
prefix
+
"_day"
if
key
in
hyperdata
:
if
key
in
hyperdata
:
date_string
+=
" "
+
hyperdata
[
key
]
date_string
+=
" "
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_hour"
key
=
prefix
+
"_hour"
if
key
in
hyperdata
:
if
key
in
hyperdata
:
date_string
+=
" "
+
hyperdata
[
key
]
date_string
+=
" "
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_minute"
key
=
prefix
+
"_minute"
if
key
in
hyperdata
:
if
key
in
hyperdata
:
date_string
+=
":"
+
hyperdata
[
key
]
date_string
+=
":"
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_second"
key
=
prefix
+
"_second"
if
key
in
hyperdata
:
if
key
in
hyperdata
:
date_string
+=
":"
+
hyperdata
[
key
]
date_string
+=
":"
+
hyperdata
.
get
(
key
,
"01"
)
try
:
try
:
hyperdata
[
prefix
+
"_date"
]
=
dateutil
.
parser
.
parse
(
date_string
)
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
hyperdata
[
prefix
+
"_date"
]
=
dateutil
.
parser
.
parse
(
date_string
)
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
except
Exception
as
error
:
except
Exception
as
error
:
...
@@ -90,13 +90,13 @@ class Parser:
...
@@ -90,13 +90,13 @@ class Parser:
except
Exception
as
error
:
except
Exception
as
error
:
try
:
try
:
print
(
error
)
print
(
"error line 93"
,
error
)
# FIXME Date format: 1994 SPR
# FIXME Date format: 1994 SPR
# By default, we take the year only
# By default, we take the year only
hyperdata
[
prefix
+
"_date"
]
=
date_parser
.
parse
(
str
(
date_string
)[:
4
])
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
hyperdata
[
prefix
+
"_date"
]
=
date_parser
.
parse
(
str
(
date_string
)[:
4
])
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
except
Exception
as
error
:
except
Exception
as
error
:
print
(
error
)
print
(
"error line 99"
,
error
)
else
:
else
:
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
hyperdata
[
'publication_date'
]
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
hyperdata
[
'publication_date'
]
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
...
@@ -113,7 +113,7 @@ class Parser:
...
@@ -113,7 +113,7 @@ class Parser:
hyperdata
[
prefix
+
"_hour"
]
=
date
.
strftime
(
"
%
H"
)
hyperdata
[
prefix
+
"_hour"
]
=
date
.
strftime
(
"
%
H"
)
hyperdata
[
prefix
+
"_minute"
]
=
date
.
strftime
(
"
%
M"
)
hyperdata
[
prefix
+
"_minute"
]
=
date
.
strftime
(
"
%
M"
)
hyperdata
[
prefix
+
"_second"
]
=
date
.
strftime
(
"
%
S"
)
hyperdata
[
prefix
+
"_second"
]
=
date
.
strftime
(
"
%
S"
)
print
(
hyperdata
[
'publication_date'
])
print
(
"line 116"
,
hyperdata
[
'publication_date'
])
# finally, return the transformed result!
# finally, return the transformed result!
return
hyperdata
return
hyperdata
...
...
gargantext/util/parsers/__init__.py
View file @
a9f54519
from
.Ris
import
RISParser
from
.Ris
import
RISParser
from
.Ris_repec
import
RepecParser
from
.Isi
import
ISIParser
from
.Isi
import
ISIParser
# from .Jstor import JstorParser
# from .Jstor import JstorParser
# from .Zotero import ZoteroParser
# from .Zotero import ZoteroParser
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment