Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
c1a02b29
Commit
c1a02b29
authored
Apr 17, 2015
by
PkSM3
Browse files
Options
Browse Files
Download
Plain Diff
[UPDATE] merge conflict solved? (pubmed server down)
parents
92cad524
4e74b74e
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
88 additions
and
103 deletions
+88
-103
celery.py
gargantext_web/celery.py
+8
-1
settings.py
gargantext_web/settings.py
+1
-1
urls.py
gargantext_web/urls.py
+2
-1
views.py
gargantext_web/views.py
+6
-4
views_optimized.py
gargantext_web/views_optimized.py
+14
-2
backupdb.py
init/backupdb.py
+0
-0
init_accounts.py
init/init_accounts.py
+0
-0
test_db.py
init/test_db.py
+0
-0
MedlineFetcherDavid2015.py
scrappers/scrap_pubmed/MedlineFetcherDavid2015.py
+0
-0
__init__.py
scrappers/scrap_pubmed/__init__.py
+0
-0
admin.py
scrappers/scrap_pubmed/admin.py
+0
-0
models.py
scrappers/scrap_pubmed/models.py
+0
-0
tests.py
scrappers/scrap_pubmed/tests.py
+0
-0
views.py
scrappers/scrap_pubmed/views.py
+51
-85
corpus.html
templates/corpus.html
+1
-1
project.html
templates/project.html
+5
-8
No files found.
gargantext_web/celery.py
View file @
c1a02b29
...
@@ -58,7 +58,7 @@ def apply_workflow(corpus_id):
...
@@ -58,7 +58,7 @@ def apply_workflow(corpus_id):
# With Django ORM
# With Django ORM
corpus_django
=
models
.
Node
.
objects
.
get
(
id
=
corpus_id
)
corpus_django
=
models
.
Node
.
objects
.
get
(
id
=
corpus_id
)
corpus_django
.
metadata
[
'Processing'
]
=
0
corpus_django
.
metadata
[
'Processing'
]
=
"2"
corpus_django
.
save
()
corpus_django
.
save
()
print
(
"-"
*
60
)
print
(
"-"
*
60
)
...
@@ -73,5 +73,12 @@ def apply_workflow(corpus_id):
...
@@ -73,5 +73,12 @@ def apply_workflow(corpus_id):
extract_ngrams
(
corpus
,
[
'title'
])
extract_ngrams
(
corpus
,
[
'title'
])
compute_tfidf
(
corpus
)
compute_tfidf
(
corpus
)
try
:
corpus_django
.
metadata
[
'Processing'
]
=
0
corpus_django
.
save
()
except
Exception
as
error
:
print
(
error
)
gargantext_web/settings.py
View file @
c1a02b29
...
@@ -97,7 +97,7 @@ INSTALLED_APPS = (
...
@@ -97,7 +97,7 @@ INSTALLED_APPS = (
'cte_tree'
,
'cte_tree'
,
'node'
,
'node'
,
'ngram'
,
'ngram'
,
'scrap_pubmed'
,
'scrap
pers.scrap
_pubmed'
,
'djcelery'
,
'djcelery'
,
'aldjemy'
,
'aldjemy'
,
'rest_framework'
,
'rest_framework'
,
...
...
gargantext_web/urls.py
View file @
c1a02b29
...
@@ -6,7 +6,7 @@ from django.contrib.auth.views import login
...
@@ -6,7 +6,7 @@ from django.contrib.auth.views import login
from
gargantext_web
import
views
,
views_optimized
from
gargantext_web
import
views
,
views_optimized
import
gargantext_web.api
import
gargantext_web.api
import
scrap_pubmed.views
as
pubmedscrapper
import
scrap
pers.scrap
_pubmed.views
as
pubmedscrapper
admin
.
autodiscover
()
admin
.
autodiscover
()
...
@@ -102,6 +102,7 @@ if settings.MAINTENANCE:
...
@@ -102,6 +102,7 @@ if settings.MAINTENANCE:
url
(
r'^$'
,
views
.
home_view
),
url
(
r'^$'
,
views
.
home_view
),
url
(
r'^about/'
,
views
.
get_about
),
url
(
r'^about/'
,
views
.
get_about
),
url
(
r'^admin/'
,
include
(
admin
.
site
.
urls
)),
url
(
r'^.*'
,
views
.
get_maintenance
),
url
(
r'^.*'
,
views
.
get_maintenance
),
)
)
...
...
gargantext_web/views.py
View file @
c1a02b29
...
@@ -39,7 +39,7 @@ from django.template import RequestContext
...
@@ -39,7 +39,7 @@ from django.template import RequestContext
from
django.contrib.auth.decorators
import
login_required
from
django.contrib.auth.decorators
import
login_required
from
django.contrib.auth
import
authenticate
,
login
,
logout
from
django.contrib.auth
import
authenticate
,
login
,
logout
from
scrap_pubmed.admin
import
Logger
from
scrap
pers.scrap
_pubmed.admin
import
Logger
from
gargantext_web.db
import
*
from
gargantext_web.db
import
*
...
@@ -259,8 +259,8 @@ def corpus(request, project_id, corpus_id):
...
@@ -259,8 +259,8 @@ def corpus(request, project_id, corpus_id):
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
try
:
try
:
offset
=
str
(
project_id
)
offset
=
int
(
project_id
)
offset
=
str
(
corpus_id
)
offset
=
int
(
corpus_id
)
except
ValueError
:
except
ValueError
:
raise
Http404
()
raise
Http404
()
...
@@ -289,8 +289,10 @@ def corpus(request, project_id, corpus_id):
...
@@ -289,8 +289,10 @@ def corpus(request, project_id, corpus_id):
try
:
try
:
processing
=
corpus
.
metadata
[
'Processing'
]
processing
=
corpus
.
metadata
[
'Processing'
]
except
:
except
Exception
as
error
:
print
(
error
)
processing
=
0
processing
=
0
print
(
'processing'
,
processing
)
html
=
t
.
render
(
Context
({
\
html
=
t
.
render
(
Context
({
\
'user'
:
user
,
\
'user'
:
user
,
\
...
...
gargantext_web/views_optimized.py
View file @
c1a02b29
import
os
from
django.shortcuts
import
redirect
from
django.shortcuts
import
redirect
from
django.shortcuts
import
render
from
django.shortcuts
import
render
from
django.http
import
Http404
,
HttpResponse
,
HttpResponseRedirect
,
HttpResponseForbidden
from
django.http
import
Http404
,
HttpResponse
,
HttpResponseRedirect
,
HttpResponseForbidden
...
@@ -14,6 +17,7 @@ from node.admin import CustomForm
...
@@ -14,6 +17,7 @@ from node.admin import CustomForm
from
gargantext_web.db
import
*
from
gargantext_web.db
import
*
from
gargantext_web.settings
import
DEBUG
,
MEDIA_ROOT
from
gargantext_web.settings
import
DEBUG
,
MEDIA_ROOT
from
gargantext_web.api
import
JsonHttpResponse
from
gargantext_web.api
import
JsonHttpResponse
import
json
import
json
import
re
import
re
...
@@ -134,7 +138,15 @@ def project(request, project_id):
...
@@ -134,7 +138,15 @@ def project(request, project_id):
)
)
session
.
add
(
corpus
)
session
.
add
(
corpus
)
session
.
commit
()
session
.
commit
()
# save the uploaded file
# If user is new, folder does not exist yet, create it then
dirpath
=
'
%
s/corpora/
%
s'
%
(
MEDIA_ROOT
,
request
.
user
.
username
)
if
not
os
.
path
.
exists
(
dirpath
):
print
(
"Creating folder
%
s"
%
dirpath
)
os
.
makedirs
(
dirpath
)
# Save the uploaded file
filepath
=
'
%
s/corpora/
%
s/
%
s'
%
(
MEDIA_ROOT
,
request
.
user
.
username
,
thefile
.
_name
)
filepath
=
'
%
s/corpora/
%
s/
%
s'
%
(
MEDIA_ROOT
,
request
.
user
.
username
,
thefile
.
_name
)
f
=
open
(
filepath
,
'wb'
)
f
=
open
(
filepath
,
'wb'
)
f
.
write
(
thefile
.
read
())
f
.
write
(
thefile
.
read
())
...
@@ -158,7 +170,7 @@ def project(request, project_id):
...
@@ -158,7 +170,7 @@ def project(request, project_id):
print
(
error
)
print
(
error
)
# redirect to the main project page
# redirect to the main project page
# TODO need to wait before response (need corpus update)
# TODO need to wait before response (need corpus update)
sleep
(
1
)
sleep
(
2
)
return
HttpResponseRedirect
(
'/project/'
+
str
(
project_id
))
return
HttpResponseRedirect
(
'/project/'
+
str
(
project_id
))
else
:
else
:
print
(
'ERROR: BAD FORM'
)
print
(
'ERROR: BAD FORM'
)
...
...
backupdb.py
→
init/
backupdb.py
View file @
c1a02b29
File moved
init_accounts.py
→
init
/init
_accounts.py
View file @
c1a02b29
File moved
test_db.py
→
init/
test_db.py
View file @
c1a02b29
File moved
scrap_pubmed/MedlineFetcherDavid2015.py
→
scrap
pers/scrap
_pubmed/MedlineFetcherDavid2015.py
View file @
c1a02b29
File moved
scrap_pubmed/__init__.py
→
scrap
pers/scrap
_pubmed/__init__.py
View file @
c1a02b29
File moved
scrap_pubmed/admin.py
→
scrap
pers/scrap
_pubmed/admin.py
View file @
c1a02b29
File moved
scrap_pubmed/models.py
→
scrap
pers/scrap
_pubmed/models.py
View file @
c1a02b29
File moved
scrap_pubmed/tests.py
→
scrap
pers/scrap
_pubmed/tests.py
View file @
c1a02b29
File moved
scrap_pubmed/views.py
→
scrap
pers/scrap
_pubmed/views.py
View file @
c1a02b29
...
@@ -3,7 +3,7 @@ from django.template.loader import get_template
...
@@ -3,7 +3,7 @@ from django.template.loader import get_template
from
django.template
import
Context
from
django.template
import
Context
from
django.contrib.auth.models
import
User
,
Group
from
django.contrib.auth.models
import
User
,
Group
from
scrap_pubmed.MedlineFetcherDavid2015
import
MedlineFetcher
from
scrap
pers.scrap
_pubmed.MedlineFetcherDavid2015
import
MedlineFetcher
from
urllib.request
import
urlopen
,
urlretrieve
from
urllib.request
import
urlopen
,
urlretrieve
import
json
import
json
...
@@ -184,31 +184,7 @@ def testISTEX(request , project_id):
...
@@ -184,31 +184,7 @@ def testISTEX(request , project_id):
print
(
request
.
method
)
print
(
request
.
method
)
alist
=
[
"bar"
,
"foo"
]
alist
=
[
"bar"
,
"foo"
]
# SQLAlchemy session
session
=
Session
()
# do we have a valid project id?
try
:
project_id
=
int
(
project_id
)
except
ValueError
:
raise
Http404
()
# do we have a valid project?
project
=
(
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
filter
(
Node
.
type_id
==
cache
.
NodeType
[
'Project'
]
.
id
)
)
.
first
()
if
project
is
None
:
raise
Http404
()
# do we have a valid user?
user
=
request
.
user
if
not
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
if
project
.
user_id
!=
user
.
id
:
return
HttpResponseForbidden
()
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
# print(alist)
# print(alist)
...
@@ -217,73 +193,63 @@ def testISTEX(request , project_id):
...
@@ -217,73 +193,63 @@ def testISTEX(request , project_id):
N
=
60
N
=
60
if
"query"
in
request
.
POST
:
query
=
request
.
POST
[
"query"
]
if
"query"
in
request
.
POST
:
query
=
request
.
POST
[
"query"
]
if
"string"
in
request
.
POST
:
query_string
=
request
.
POST
[
"string"
]
.
replace
(
" "
,
"+"
)
if
"string"
in
request
.
POST
:
query_string
=
request
.
POST
[
"string"
]
.
replace
(
" "
,
"+"
)
if
"N"
in
request
.
POST
:
N
=
int
(
request
.
POST
[
"N"
])
# if "N" in request.POST: N = request.POST["N"]
print
(
query_string
,
query
,
N
)
print
(
query_string
,
query
,
N
)
urlreqs
=
[]
#
urlreqs = []
pagesize
=
50
0
# pagesize = 5
0
tasks
=
MedlineFetcher
()
#
tasks = MedlineFetcher()
chunks
=
list
(
tasks
.
chunks
(
range
(
N
),
pagesize
))
#
chunks = list(tasks.chunks(range(N), pagesize))
for
k
in
chunks
:
#
for k in chunks:
if
(
k
[
0
]
+
pagesize
)
>
N
:
pagesize
=
N
-
k
[
0
]
#
if (k[0]+pagesize)>N: pagesize = N-k[0]
urlreqs
.
append
(
"http://api.istex.fr/document/?q="
+
query_string
+
"&output=*&"
+
"from="
+
str
(
k
[
0
])
+
"&size="
+
str
(
pagesize
))
#
urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
print
(
urlreqs
)
#
print(urlreqs)
# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
# print(urlreqs)
# print(urlreqs)
resourcetype
=
cache
.
ResourceType
[
"istex"
]
# resource_type = ResourceType.objects.get(name="istext" )
print
(
resourcetype
)
# corpus node instanciation as a Django model
# parent = Node.objects.get(id=project_id)
corpus
=
Node
(
# node_type = NodeType.objects.get(name='Corpus')
name
=
query
,
# type_id = NodeType.objects.get(name='Document').id
user_id
=
request
.
user
.
id
,
# user_id = User.objects.get( username=request.user ).id
parent_id
=
project_id
,
type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
,
# corpus = Node(
language_id
=
None
,
# user=request.user,
)
# parent=parent,
session
.
add
(
corpus
)
# type=node_type,
session
.
commit
()
# name=query,
# )
tasks
=
MedlineFetcher
()
for
i
in
range
(
8
):
# corpus.save()
t
=
threading
.
Thread
(
target
=
tasks
.
worker2
)
#thing to do
t
.
daemon
=
True
# thread dies when main thread (only non-daemon thread) exits.
# # configuring your queue with the event
t
.
start
()
# for i in range(8):
for
url
in
urlreqs
:
# t = threading.Thread(target=tasks.worker2) #thing to do
filename
=
MEDIA_ROOT
+
'/corpora/
%
s/
%
s'
%
(
request
.
user
,
str
(
datetime
.
datetime
.
now
()
.
isoformat
()))
# t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
tasks
.
q
.
put
(
[
url
,
filename
])
#put a task in th queue
# t.start()
tasks
.
q
.
join
()
# wait until everything is finished
# for url in urlreqs:
# filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
dwnldsOK
=
0
# tasks.q.put( [url , filename]) #put a task in th queue
for
filename
in
tasks
.
firstResults
:
# tasks.q.join() # wait until everything is finished
if
filename
!=
False
:
# for filename in tasks.firstResults:
print
(
filename
)
# corpus.add_resource( user=request.user, type=resource_type, file=filename )
# add the uploaded resource to the corpus
add_resource
(
corpus
,
user_id
=
request
.
user
.
id
,
# corpus.save()
type_id
=
resourcetype
.
id
,
# print("DEBUG:",DEBUG)
file
=
filename
,
# # do the WorkFlow
)
# try:
dwnldsOK
+=
1
# if DEBUG is True:
# corpus.workflow()
if
dwnldsOK
==
0
:
return
JsonHttpResponse
([
"fail"
])
# else:
# corpus.workflow.apply_async((), countdown=3)
try
:
def
apply_workflow
(
corpus
):
# return JsonHttpResponse(["workflow","finished"])
parse_resources
(
corpus
)
# except Exception as error:
extract_ngrams
(
corpus
,
[
'title'
])
# print(error)
compute_tfidf
(
corpus
)
if
DEBUG
:
apply_workflow
(
corpus
)
else
:
thread
=
threading
.
Thread
(
target
=
apply_workflow
,
args
=
(
corpus
,
),
daemon
=
True
)
thread
.
start
()
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
error
)
return
HttpResponseRedirect
(
'/project/'
+
str
(
project_id
))
data
=
[
query_string
,
query
,
N
]
data
=
[
query_string
,
query
,
N
]
return
JsonHttpResponse
(
data
)
return
JsonHttpResponse
(
data
)
...
...
templates/corpus.html
View file @
c1a02b29
...
@@ -131,7 +131,7 @@
...
@@ -131,7 +131,7 @@
<div
class=
"col-md-4"
>
<div
class=
"col-md-4"
>
<div
class=
"jumbotron"
>
<div
class=
"jumbotron"
>
{% if processing
=
= "1" %}
{% if processing
>
= "1" %}
<h3>
<img
width=
"20px"
src=
"{% static "
js
/
libs
/
img2
/
loading-bar
.
gif
"
%}"
></img>
Graph (later)
</h3>
<h3>
<img
width=
"20px"
src=
"{% static "
js
/
libs
/
img2
/
loading-bar
.
gif
"
%}"
></img>
Graph (later)
</h3>
{% else %}
{% else %}
<h3><a
href=
"/project/{{project.id}}/corpus/{{ corpus.id }}/explorer"
>
Graph
</a></h3>
<h3><a
href=
"/project/{{project.id}}/corpus/{{ corpus.id }}/explorer"
>
Graph
</a></h3>
...
...
templates/project.html
View file @
c1a02b29
...
@@ -277,10 +277,10 @@
...
@@ -277,10 +277,10 @@
function
bringDaNoise
()
{
function
bringDaNoise
()
{
var
theresults
=
$
(
"#theresults"
).
html
()
var
theresults
=
$
(
"#theresults"
).
html
()
if
(
theresults
&&
theresults
.
search
(
"No results"
)
==-
1
)
{
if
(
theresults
&&
theresults
.
search
(
"No results"
)
==-
1
)
{
var
origQuery
=
$
(
"#id_name"
).
val
()
console
.
log
(
"we've in dynamic mode"
)
console
.
log
(
"we've in dynamic mode"
)
$
(
"#simpleloader"
).
html
(
'<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>'
)
$
(
"#simpleloader"
).
html
(
'<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>'
)
$
(
"#submit_thing"
).
prop
(
'onclick'
,
null
);
$
(
"#submit_thing"
).
prop
(
'onclick'
,
null
);
var
theType
=
$
(
"#id_type option:selected"
).
html
();
var
theType
=
$
(
"#id_type option:selected"
).
html
();
if
(
theType
==
"Pubmed (xml format)"
)
doTheQuery
();
if
(
theType
==
"Pubmed (xml format)"
)
doTheQuery
();
if
(
theType
==
"istex"
)
{
if
(
theType
==
"istex"
)
{
...
@@ -301,13 +301,12 @@
...
@@ -301,13 +301,12 @@
$
(
"#id_form"
).
submit
();
$
(
"#id_form"
).
submit
();
}
}
}
}
}
}
}
}
function
getGlobalResults
(
value
){
function
getGlobalResults
(
value
){
console
.
log
(
"in getGlobalResults()"
)
console
.
log
(
"in getGlobalResults()"
)
console
.
log
(
"value:"
)
console
.
log
(
value
)
// AJAX to django
// AJAX to django
var
pubmedquery
=
$
(
"#id_name"
).
val
()
var
pubmedquery
=
$
(
"#id_name"
).
val
()
var
Npubs
=
$
(
"#id_N"
).
val
();
var
Npubs
=
$
(
"#id_N"
).
val
();
...
@@ -318,8 +317,6 @@
...
@@ -318,8 +317,6 @@
$
(
"#"
+
value
.
id
).
prop
(
'onclick'
,
null
);
$
(
"#"
+
value
.
id
).
prop
(
'onclick'
,
null
);
var
theType
=
$
(
"#id_type option:selected"
).
html
();
var
theType
=
$
(
"#id_type option:selected"
).
html
();
console
.
log
(
"theType:"
)
console
.
log
(
theType
)
if
(
theType
==
"Pubmed (xml format)"
)
{
if
(
theType
==
"Pubmed (xml format)"
)
{
$
.
ajax
({
$
.
ajax
({
...
@@ -357,7 +354,7 @@
...
@@ -357,7 +354,7 @@
});
});
}
}
if
(
theType
==
"istex"
)
{
if
(
theType
==
"istex
t
"
)
{
console
.
log
(
window
.
location
.
origin
+
"tests/istextquery"
)
console
.
log
(
window
.
location
.
origin
+
"tests/istextquery"
)
$
.
ajax
({
$
.
ajax
({
// contentType: "application/json",
// contentType: "application/json",
...
@@ -484,7 +481,7 @@
...
@@ -484,7 +481,7 @@
var
origQuery
=
query
var
origQuery
=
query
var
pubmedifiedQuery
=
{
query
:
query
,
string
:
query
,
N
:
Npubs
}
var
pubmedifiedQuery
=
{
query
:
query
,
string
:
query
}
// console.log(pubmedifiedQuery)
// console.log(pubmedifiedQuery)
var
projectid
=
window
.
location
.
href
.
split
(
"project"
)[
1
].
replace
(
/
\/
/g
,
''
)
//replace all the slashes
var
projectid
=
window
.
location
.
href
.
split
(
"project"
)[
1
].
replace
(
/
\/
/g
,
''
)
//replace all the slashes
...
@@ -500,7 +497,7 @@
...
@@ -500,7 +497,7 @@
success
:
function
(
data
)
{
success
:
function
(
data
)
{
console
.
log
(
"ajax_success: in testISTEX()"
)
console
.
log
(
"ajax_success: in testISTEX()"
)
console
.
log
(
data
)
console
.
log
(
data
)
location
.
reload
();
//
location.reload();
},
},
error
:
function
(
result
)
{
error
:
function
(
result
)
{
console
.
log
(
"in testISTEX(). Data not found"
);
console
.
log
(
"in testISTEX(). Data not found"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment