Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
2bf705d9
Commit
2bf705d9
authored
Jan 18, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/simon-dev' into dev
parents
b25deb7b
fbfb5d60
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
41 additions
and
3 deletions
+41
-3
env.py
alembic/env.py
+2
-1
db.py
gargantext/util/db.py
+1
-1
CSV.py
gargantext/util/parsers/CSV.py
+2
-1
ISI.py
gargantext/util/parsers/ISI.py
+33
-0
_Parser.py
gargantext/util/parsers/_Parser.py
+3
-0
No files found.
alembic/env.py
View file @
2bf705d9
...
@@ -18,7 +18,8 @@ from gargantext import settings, models
...
@@ -18,7 +18,8 @@ from gargantext import settings, models
# this is the Alembic Config object, which provides
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
# access to the values within the .ini file in use.
config
=
context
.
config
config
=
context
.
config
config
.
set_main_option
(
"sqlalchemy.url"
,
settings
.
DATABASES
[
'default'
][
'URL'
])
config
.
set_main_option
(
"sqlalchemy.url"
,
settings
.
DATABASES
[
'default'
][
'SECRET_URL'
])
# Interpret the config file for Python logging.
# Interpret the config file for Python logging.
# This line sets up loggers basically.
# This line sets up loggers basically.
...
...
gargantext/util/db.py
View file @
2bf705d9
...
@@ -10,7 +10,7 @@ from sqlalchemy import delete
...
@@ -10,7 +10,7 @@ from sqlalchemy import delete
def
get_engine
():
def
get_engine
():
from
sqlalchemy
import
create_engine
from
sqlalchemy
import
create_engine
return
create_engine
(
settings
.
DATABASES
[
'default'
][
'URL'
]
return
create_engine
(
settings
.
DATABASES
[
'default'
][
'
SECRET_
URL'
]
,
use_native_hstore
=
True
,
use_native_hstore
=
True
,
json_serializer
=
json_dumps
,
json_serializer
=
json_dumps
,
pool_size
=
20
,
max_overflow
=
0
,
pool_size
=
20
,
max_overflow
=
0
...
...
gargantext/util/parsers/CSV.py
View file @
2bf705d9
...
@@ -16,7 +16,8 @@ class CSVParser(Parser):
...
@@ -16,7 +16,8 @@ class CSVParser(Parser):
def
parse
(
self
,
fp
=
None
):
def
parse
(
self
,
fp
=
None
):
fp
=
fp
or
self
.
_file
fp
=
fp
or
self
.
_file
df
=
pandas
.
read_csv
(
fp
,
dtype
=
object
,
skip_blank_lines
=
True
,
sep
=
None
,
df
=
pandas
.
read_csv
(
fp
,
dtype
=
object
,
engine
=
'python'
,
skip_blank_lines
=
True
,
sep
=
None
,
na_values
=
[],
keep_default_na
=
False
)
na_values
=
[],
keep_default_na
=
False
)
# Return a generator of dictionaries with column labels as keys,
# Return a generator of dictionaries with column labels as keys,
...
...
gargantext/util/parsers/ISI.py
View file @
2bf705d9
import
re
from
.RIS
import
RISParser
from
.RIS
import
RISParser
...
@@ -17,3 +19,34 @@ class ISIParser(RISParser):
...
@@ -17,3 +19,34 @@ class ISIParser(RISParser):
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
"AB"
:
{
"type"
:
"hyperdata"
,
"key"
:
"abstract"
,
"separator"
:
" "
},
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
"WC"
:
{
"type"
:
"hyperdata"
,
"key"
:
"fields"
},
}
}
_year
=
re
.
compile
(
r'\b\d{4}\b'
)
_season
=
re
.
compile
(
r'\b(SPR|SUM|FAL|WIN)\b'
,
re
.
I
)
_month_interval
=
re
.
compile
(
r'\b([A-Z]{3})-([A-Z]{3})\b'
,
re
.
I
)
_day_interval
=
re
.
compile
(
r'\b(\d{1,2})-(\d{1,2})\b'
)
def
_preprocess_PD
(
self
,
PD
,
PY
):
# Add a year to date if applicable
if
PY
and
self
.
_year
.
search
(
PY
)
and
not
self
.
_year
.
search
(
PD
):
PD
=
PY
+
" "
+
PD
# Drop season if any
PD
=
self
.
_season
.
sub
(
''
,
PD
)
.
strip
()
# If a month interval is present, keep only the first month
PD
=
self
.
_month_interval
.
sub
(
r'\1'
,
PD
)
# If a day interval is present, keep only the first day
PD
=
self
.
_day_interval
.
sub
(
r'\1'
,
PD
)
return
PD
def
parse
(
self
,
file
):
PD
=
self
.
_parameters
[
"PD"
][
"key"
]
PY
=
self
.
_parameters
[
"PY"
][
"key"
]
for
entry
in
super
()
.
parse
(
file
):
if
PD
in
entry
:
entry
[
PD
]
=
self
.
_preprocess_PD
(
entry
[
PD
],
entry
[
PY
])
yield
entry
gargantext/util/parsers/_Parser.py
View file @
2bf705d9
...
@@ -86,6 +86,9 @@ class Parser:
...
@@ -86,6 +86,9 @@ class Parser:
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
print
(
"WARNING: Date unknown at _Parser level, using now()"
)
hyperdata
[
'publication_date'
]
=
datetime
.
now
()
hyperdata
[
'publication_date'
]
=
datetime
.
now
()
# XXX Handling prefixes is most likely useless: there seem to be only
# one prefix which is "publication" (like in "publication_date").
# ...then parse all the "date" fields, to parse it into separate elements
# ...then parse all the "date" fields, to parse it into separate elements
prefixes
=
[
key
[:
-
5
]
for
key
in
hyperdata
.
keys
()
if
key
[
-
5
:]
==
"_date"
]
prefixes
=
[
key
[:
-
5
]
for
key
in
hyperdata
.
keys
()
if
key
[
-
5
:]
==
"_date"
]
for
prefix
in
prefixes
:
for
prefix
in
prefixes
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment