Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
a5a6ba76
Commit
a5a6ba76
authored
May 26, 2016
by
c24b
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
FIX CERN=>DOC PARSER
parent
49736e59
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
51 additions
and
58 deletions
+51
-58
constants.py
gargantext/constants.py
+6
-4
nodes.py
gargantext/models/nodes.py
+1
-0
__init__.py
gargantext/util/toolchain/__init__.py
+14
-20
projects.py
gargantext/views/pages/projects.py
+24
-28
terms.py
gargantext/views/pages/terms.py
+2
-2
istex.py
moissonneurs/istex.py
+2
-2
pubmed.py
moissonneurs/pubmed.py
+2
-2
No files found.
gargantext/constants.py
View file @
a5a6ba76
...
@@ -131,7 +131,9 @@ from gargantext.util.parsers import \
...
@@ -131,7 +131,9 @@ from gargantext.util.parsers import \
def
get_resource
(
corpus_type
):
def
get_resource
(
corpus_type
):
'''get ressources values for a given ressource_type id'''
'''get ressources values for a given ressource_type id'''
return
[
n
for
n
in
RESOURCES_TYPE
if
n
[
"type"
]
==
corpus_type
][
0
]
for
n
in
RESOURCETYPES
:
if
n
[
"type"
]
==
corpus_type
:
return
n
RESOURCETYPES
=
[
RESOURCETYPES
=
[
# type 0
# type 0
...
@@ -176,7 +178,7 @@ RESOURCETYPES = [
...
@@ -176,7 +178,7 @@ RESOURCETYPES = [
'accepted_formats'
:[
"zip"
,],
'accepted_formats'
:[
"zip"
,],
},
},
# type 6
# type 6
{
'type'
:
6
{
'type'
:
6
,
'name'
:
'Web of Science (ISI format)'
,
'name'
:
'Web of Science (ISI format)'
,
'parser'
:
ISIParser
,
'parser'
:
ISIParser
,
'default_language'
:
'en'
,
'default_language'
:
'en'
,
...
@@ -209,8 +211,8 @@ RESOURCETYPES = [
...
@@ -209,8 +211,8 @@ RESOURCETYPES = [
"parser"
:
CernParser
,
"parser"
:
CernParser
,
"default_language"
:
"en"
,
"default_language"
:
"en"
,
'accepted_formats'
:[
"zip"
,
"xml"
],
'accepted_formats'
:[
"zip"
,
"xml"
],
"scrapper"
:
CernScrapper
,
#~
"scrapper": CernScrapper,
"base_url"
:
"http://api.scoap3.org/search?"
,
#~
"base_url": "http://api.scoap3.org/search?",
},
},
]
]
...
...
gargantext/models/nodes.py
View file @
a5a6ba76
...
@@ -110,6 +110,7 @@ class Node(Base):
...
@@ -110,6 +110,7 @@ class Node(Base):
if
order
is
not
None
:
if
order
is
not
None
:
query
=
query
.
order_by
(
Node
.
name
)
query
=
query
.
order_by
(
Node
.
name
)
return
query
return
query
def
add_child
(
self
,
**
kwargs
):
def
add_child
(
self
,
**
kwargs
):
...
...
gargantext/util/toolchain/__init__.py
View file @
a5a6ba76
...
@@ -13,11 +13,11 @@ from .ngram_groups import compute_groups
...
@@ -13,11 +13,11 @@ from .ngram_groups import compute_groups
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
from
gargantext.models
import
Node
from
gargantext.models
import
Node
from
gargantext.util.files
import
check_format
,
upload
from
datetime
import
datetime
from
datetime
import
datetime
from
celery
import
shared_task
from
celery
import
shared_task
def
add_corpus
(
request
):
def
add_corpus
(
request
,
project
):
'''adding a new corpus into project corpus:
'''adding a new corpus into project corpus:
verifying two prerequisites before upload:
verifying two prerequisites before upload:
- file size can exceed UPLOAD_LIMIT set in constants
- file size can exceed UPLOAD_LIMIT set in constants
...
@@ -28,6 +28,10 @@ def add_corpus(request):
...
@@ -28,6 +28,10 @@ def add_corpus(request):
corpus_msg
=
None
corpus_msg
=
None
#Corpus est du type Node
#Corpus est du type Node
#print(corpus.__str__)
#print(corpus.__str__)
corpus
=
project
.
add_child
(
name
=
request
.
POST
[
'name'
],
typename
=
'CORPUS'
,
)
#get ressource type
#get ressource type
corpus_type
=
int
(
request
.
POST
[
'type'
])
corpus_type
=
int
(
request
.
POST
[
'type'
])
#corpus.type = int(request.POST['type'])
#corpus.type = int(request.POST['type'])
...
@@ -43,25 +47,15 @@ def add_corpus(request):
...
@@ -43,25 +47,15 @@ def add_corpus(request):
except
OSError
as
e
:
except
OSError
as
e
:
corpus_status
=
False
corpus_status
=
False
corpus_status_msg
=
str
(
e
)
corpus_status_msg
=
str
(
e
)
if
corpus_status
:
corpus
.
add_resource
(
corpus
.
add_resource
(
type
,
type
=
corpus_type
,
path
,
path
=
path
,
type
=
corpus_type
,
format
=
corpus_format
,
)
else
:
corpus
.
add_resource
(
type
,
path
,
type
=
corpus_type
,
format
=
corpus_format
,
status
=
corpus_status
,
status_message
=
corpus_status_msg
,
)
)
print
(
session
.
add
(
corpus
))
print
(
session
.
commit
())
session
.
add
(
corpus
)
return
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
session
.
commit
()
return
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus
.
id
)
.
first
()
#@shared_task
#@shared_task
def
parse_extract
(
corpus
):
def
parse_extract
(
corpus
):
...
...
gargantext/views/pages/projects.py
View file @
a5a6ba76
...
@@ -94,11 +94,8 @@ def project(request, project_id):
...
@@ -94,11 +94,8 @@ def project(request, project_id):
# add a new corpus into Node Project > Node Corpus > Ressource
# add a new corpus into Node Project > Node Corpus > Ressource
if
request
.
method
==
'POST'
:
if
request
.
method
==
'POST'
:
corpus
=
project
.
add_child
(
name
=
request
.
POST
[
'name'
],
corpus
=
add_corpus
(
request
,
project
)
typename
=
'CORPUS'
,
)
corpus
=
add_corpus
(
request
)
if
corpus
.
status
:
if
corpus
.
status
:
# parse_extract: fileparsing -> ngram extraction -> lists
# parse_extract: fileparsing -> ngram extraction -> lists
...
@@ -119,32 +116,31 @@ def project(request, project_id):
...
@@ -119,32 +116,31 @@ def project(request, project_id):
for
corpus
in
corpora
:
for
corpus
in
corpora
:
# we only consider the first resource of the corpus to determine its type
# we only consider the first resource of the corpus to determine its type
resources
=
corpus
.
resources
()
resources
=
corpus
.
resources
()
if
len
(
resources
):
if
len
(
resources
)
>
0
:
resource
=
resources
[
0
]
resource
=
resources
[
0
]
resource
=
get_resource
(
resource
[
"type"
])
##here map from RESSOURCES_TYPES_ID and NOT NAME
##here map from RESSOURCES_TYPES_ID and NOT NAME
resource_type_name
=
RESOURCETYPES
[
resource
[
'type'
]][
'name'
]
resource_type_name
=
resource
[
'name'
]
resource_type_accepted_formats
=
RESOURCETYPES
[
resource
[
'type'
]][
'accepted_formats'
]
resource_type_accepted_formats
=
resource
[
'accepted_formats'
]
else
:
print
(
"(WARNING) PROJECT view: no listed resource or one of the corpus has an invalid type"
)
# add some data for the viewer
corpus
.
count
=
corpus
.
children
(
'DOCUMENT'
)
.
count
()
# add some data for the viewer
status
=
corpus
.
status
()
corpus
.
count
=
corpus
.
children
(
'DOCUMENT'
)
.
count
()
if
status
is
not
None
and
not
status
[
'complete'
]:
status
=
corpus
.
status
()
if
not
status
[
'error'
]:
if
status
is
not
None
and
not
status
[
'complete'
]:
corpus
.
status_message
=
'(in progress:
%
s,
%
d complete)'
%
(
if
not
status
[
'error'
]:
status
[
'action'
]
.
replace
(
'_'
,
' '
),
corpus
.
status_message
=
'(in progress:
%
s,
%
d complete)'
%
(
status
[
'progress'
],
status
[
'action'
]
.
replace
(
'_'
,
' '
),
)
status
[
'progress'
],
else
:
)
corpus
.
status_message
=
'(aborted: "
%
s" after
%
i docs)'
%
(
status
[
'error'
][
-
1
],
status
[
'progress'
]
)
else
:
else
:
corpus
.
status_message
=
'(aborted: "
%
s" after
%
i docs)'
%
(
corpus
.
status_message
=
''
status
[
'error'
][
-
1
],
# add
status
[
'progress'
]
sourcename2corpora
[
resource_type_name
]
.
append
(
corpus
)
)
else
:
corpus
.
status_message
=
''
# add
sourcename2corpora
[
resource_type_name
]
.
append
(
corpus
)
# source & their respective counts
# source & their respective counts
total_documentscount
=
0
total_documentscount
=
0
sourcename2documentscount
=
defaultdict
(
int
)
sourcename2documentscount
=
defaultdict
(
int
)
...
...
gargantext/views/pages/terms.py
View file @
a5a6ba76
...
@@ -2,7 +2,7 @@ from gargantext.util.http import requires_auth, render, settings
...
@@ -2,7 +2,7 @@ from gargantext.util.http import requires_auth, render, settings
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
from
gargantext.util.db_cache
import
cache
from
gargantext.util.db_cache
import
cache
from
gargantext.models
import
Node
from
gargantext.models
import
Node
from
gargantext.constants
import
resourcenam
e
from
gargantext.constants
import
get_resourc
e
from
datetime
import
datetime
from
datetime
import
datetime
@
requires_auth
@
requires_auth
...
@@ -32,7 +32,7 @@ def ngramtable(request, project_id, corpus_id):
...
@@ -32,7 +32,7 @@ def ngramtable(request, project_id, corpus_id):
'date'
:
datetime
.
now
(),
'date'
:
datetime
.
now
(),
'project'
:
project
,
'project'
:
project
,
'corpus'
:
corpus
,
'corpus'
:
corpus
,
'resourcename'
:
resourcename
(
corpus
)
,
'resourcename'
:
get_ressource
(
corpus
)[
"name"
]
,
'view'
:
'terms'
'view'
:
'terms'
},
},
)
)
moissonneurs/istex.py
View file @
a5a6ba76
...
@@ -8,7 +8,7 @@ from traceback import print_tb
...
@@ -8,7 +8,7 @@ from traceback import print_tb
from
django.shortcuts
import
redirect
,
render
from
django.shortcuts
import
redirect
,
render
from
django.http
import
Http404
,
HttpResponseRedirect
,
HttpResponseForbidden
from
django.http
import
Http404
,
HttpResponseRedirect
,
HttpResponseForbidden
from
gargantext.constants
import
resourcetype
,
QUERY_SIZE_N_MAX
from
gargantext.constants
import
QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.http
import
JsonHttpResponse
...
@@ -133,7 +133,7 @@ def save(request , project_id):
...
@@ -133,7 +133,7 @@ def save(request , project_id):
if
filename
!=
False
:
if
filename
!=
False
:
# add the uploaded resource to the corpus
# add the uploaded resource to the corpus
corpus
.
add_resource
(
corpus
.
add_resource
(
type
=
resourcetype
(
'ISTex'
)
type
=
9
,
path
=
filename
,
path
=
filename
)
)
dwnldsOK
+=
1
dwnldsOK
+=
1
...
...
moissonneurs/pubmed.py
View file @
a5a6ba76
...
@@ -18,7 +18,7 @@ from traceback import print_tb
...
@@ -18,7 +18,7 @@ from traceback import print_tb
from
django.shortcuts
import
redirect
from
django.shortcuts
import
redirect
from
django.http
import
Http404
,
HttpResponseRedirect
,
HttpResponseForbidden
from
django.http
import
Http404
,
HttpResponseRedirect
,
HttpResponseForbidden
from
gargantext.constants
import
resourcetyp
e
,
QUERY_SIZE_N_MAX
from
gargantext.constants
import
get_resourc
e
,
QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
from
gargantext.util.db_cache
import
cache
from
gargantext.util.db_cache
import
cache
...
@@ -134,7 +134,7 @@ def save( request , project_id ) :
...
@@ -134,7 +134,7 @@ def save( request , project_id ) :
print
(
filename
)
print
(
filename
)
if
filename
!=
False
:
if
filename
!=
False
:
# add the uploaded resource to the corpus
# add the uploaded resource to the corpus
corpus
.
add_resource
(
type
=
resourcetype
(
'Pubmed (XML format)'
)
corpus
.
add_resource
(
type
=
4
,
path
=
filename
,
path
=
filename
,
url
=
None
,
url
=
None
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment