Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
3ff6aa00
Commit
3ff6aa00
authored
Jun 03, 2016
by
Romain Loth
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'unstable' into romain-goodies
Conflicts: templates/pages/projects/project.html
parents
bd764941
a9f54519
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
92 additions
and
20 deletions
+92
-20
constants.py
gargantext/constants.py
+7
-1
Ris.py
gargantext/util/parsers/Ris.py
+2
-1
Ris_repec.py
gargantext/util/parsers/Ris_repec.py
+67
-0
_Parser.py
gargantext/util/parsers/_Parser.py
+9
-9
__init__.py
gargantext/util/parsers/__init__.py
+7
-6
project.html
templates/pages/projects/project.html
+0
-3
No files found.
gargantext/constants.py
View file @
3ff6aa00
...
...
@@ -128,7 +128,7 @@ LANGUAGES = {
from
gargantext.util.parsers
import
\
EuropressParser
,
RISParser
,
PubmedParser
,
ISIParser
,
CSVParser
,
ISTexParser
,
CernParser
EuropressParser
,
RISParser
,
PubmedParser
,
ISIParser
,
CSVParser
,
ISTexParser
,
CernParser
,
RepecParser
def
resourcetype
(
name
):
'''
...
...
@@ -208,6 +208,12 @@ RESOURCETYPES = [
#~ "base_url": "http://api.scoap3.org/search?",
},
# type 11
{
'name'
:
'REPEC (RIS format)'
,
'parser'
:
RepecParser
,
'default_language'
:
'en'
,
},
]
# linguistic extraction parameters ---------------------------------------------
...
...
gargantext/util/parsers/Ris.py
View file @
3ff6aa00
...
...
@@ -18,11 +18,12 @@ class RISParser(Parser):
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
"
,
"
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
"
\n
"
},
b
"T2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"N1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"references"
,
"separator"
:
", "
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_iso2"
},
b
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
b
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
...
...
gargantext/util/parsers/Ris_repec.py
0 → 100644
View file @
3ff6aa00
from
._Parser
import
Parser
from
gargantext.util.languages
import
languages
#from admin.utils import PrintException
class
RepecParser
(
Parser
):
# def __init__(self, language_cache=None):
#
# #super(Parser, self).__init__()
# #super(Parser, self).__init__()
# self._languages_cache = LanguagesCache() if language_cache is None else language_cache
_begin
=
6
_parameters
=
{
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"T1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"A1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
"
\n
"
},
b
"JO"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"Y1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"N1"
:
{
"type"
:
"hyperdata"
,
"key"
:
"references"
,
"separator"
:
", "
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_iso2"
},
b
"N2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
b
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
}
def
parse
(
self
,
file
):
hyperdata
=
{}
last_key
=
None
last_values
=
[]
# browse every line of the file
for
line
in
file
:
if
len
(
line
)
>
2
:
# extract the parameter key
parameter_key
=
line
[:
2
]
if
parameter_key
!=
b
' '
and
parameter_key
!=
last_key
:
if
last_key
in
self
.
_parameters
:
# translate the parameter key
parameter
=
self
.
_parameters
[
last_key
]
if
parameter
[
"type"
]
==
"hyperdata"
:
separator
=
parameter
[
"separator"
]
if
"separator"
in
parameter
else
""
if
parameter
[
"key"
]
==
"publication_year"
:
hyperdata
[
parameter
[
"key"
]]
=
separator
.
join
(
last_values
)[:
4
]
else
:
hyperdata
[
parameter
[
"key"
]]
=
separator
.
join
(
last_values
)
elif
parameter
[
"type"
]
==
"delimiter"
:
if
'language_fullname'
not
in
hyperdata
.
keys
():
if
'language_iso3'
not
in
hyperdata
.
keys
():
if
'language_iso2'
not
in
hyperdata
.
keys
():
hyperdata
[
'language_iso2'
]
=
'en'
yield
hyperdata
hyperdata
=
{}
last_key
=
parameter_key
last_values
=
[]
try
:
last_values
.
append
(
line
[
self
.
_begin
:
-
1
]
.
decode
())
except
Exception
as
error
:
print
(
error
)
# if a hyperdata object is left in memory, yield it as well
if
hyperdata
:
yield
hyperdata
gargantext/util/parsers/_Parser.py
View file @
3ff6aa00
...
...
@@ -60,26 +60,26 @@ class Parser:
print
(
error
,
'Date not parsed for:'
,
date_string
)
hyperdata
[
'publication_date'
]
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
elif
hyperdata
.
get
(
'publication_year'
,
None
)
is
not
None
:
prefixes
=
[
key
[:
-
5
]
for
key
in
hyperdata
.
keys
()
if
key
[
-
5
:]
==
"_year"
]
for
prefix
in
prefixes
:
date_string
=
hyperdata
[
prefix
+
"_year"
]
key
=
prefix
+
"_month"
if
key
in
hyperdata
:
date_string
+=
" "
+
hyperdata
[
key
]
date_string
+=
" "
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_day"
if
key
in
hyperdata
:
date_string
+=
" "
+
hyperdata
[
key
]
date_string
+=
" "
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_hour"
if
key
in
hyperdata
:
date_string
+=
" "
+
hyperdata
[
key
]
date_string
+=
" "
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_minute"
if
key
in
hyperdata
:
date_string
+=
":"
+
hyperdata
[
key
]
date_string
+=
":"
+
hyperdata
.
get
(
key
,
"01"
)
key
=
prefix
+
"_second"
if
key
in
hyperdata
:
date_string
+=
":"
+
hyperdata
[
key
]
date_string
+=
":"
+
hyperdata
.
get
(
key
,
"01"
)
try
:
hyperdata
[
prefix
+
"_date"
]
=
dateutil
.
parser
.
parse
(
date_string
)
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
except
Exception
as
error
:
...
...
@@ -90,13 +90,13 @@ class Parser:
except
Exception
as
error
:
try
:
print
(
error
)
print
(
"error line 93"
,
error
)
# FIXME Date format: 1994 SPR
# By default, we take the year only
hyperdata
[
prefix
+
"_date"
]
=
date_parser
.
parse
(
str
(
date_string
)[:
4
])
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
except
Exception
as
error
:
print
(
error
)
print
(
"error line 99"
,
error
)
else
:
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
hyperdata
[
'publication_date'
]
=
datetime
.
datetime
.
now
()
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
...
...
@@ -113,7 +113,7 @@ class Parser:
hyperdata
[
prefix
+
"_hour"
]
=
date
.
strftime
(
"
%
H"
)
hyperdata
[
prefix
+
"_minute"
]
=
date
.
strftime
(
"
%
M"
)
hyperdata
[
prefix
+
"_second"
]
=
date
.
strftime
(
"
%
S"
)
print
(
hyperdata
[
'publication_date'
])
print
(
"line 116"
,
hyperdata
[
'publication_date'
])
# finally, return the transformed result!
return
hyperdata
...
...
gargantext/util/parsers/__init__.py
View file @
3ff6aa00
from
.Ris
import
RISParser
from
.Isi
import
ISIParser
from
.Ris
import
RISParser
from
.Ris_repec
import
RepecParser
from
.Isi
import
ISIParser
# from .Jstor import JstorParser
# from .Zotero import ZoteroParser
from
.Pubmed
import
PubmedParser
from
.Pubmed
import
PubmedParser
# # 2015-12-08: parser 2 en 1
from
.Europress
import
EuropressParser
from
.ISTex
import
ISTexParser
from
.CSV
import
CSVParser
from
.Cern
import
CernParser
from
.ISTex
import
ISTexParser
from
.CSV
import
CSVParser
from
.Cern
import
CernParser
templates/pages/projects/project.html
View file @
3ff6aa00
...
...
@@ -114,7 +114,6 @@
<span
class=
"glyphicon glyphicon-repeat"
aria-hidden=
"true"
title=
'Recalculate ngram scores and similarities'
></span>
</button>
<button
type=
"button"
class=
"btn btn-default"
data-container=
"body"
data-toggle=
"popover"
data-placement=
"bottom"
data-content=
"
<ul>
...
...
@@ -130,11 +129,9 @@
<span
class=
"glyphicon glyphicon-trash"
aria-hidden=
"true"
title=
'Delete this corpus'
></span>
</button>
{% endif %}
{% endifequal %}
{% endfor %}
</div>
<div
class=
"col-md-3 content"
>
{% for state in corpus.hyperdata.statuses %}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment