Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
94d46cce
Commit
94d46cce
authored
Sep 08, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/c24b-testing-cern-crawler' into testing-merge
parents
bf514b3e
d1f79fed
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
293 additions
and
260 deletions
+293
-260
CERN.py
gargantext/util/crawlers/CERN.py
+47
-62
_Crawler.py
gargantext/util/crawlers/_Crawler.py
+1
-1
cern.py
moissonneurs/cern.py
+102
-165
urls.py
moissonneurs/urls.py
+3
-7
project.html
templates/pages/projects/project.html
+140
-25
No files found.
gargantext/util/crawlers/CERN.py
View file @
94d46cce
...
@@ -5,28 +5,31 @@
...
@@ -5,28 +5,31 @@
# ****************************
# ****************************
# Author:c24b
# Author:c24b
# Date: 27/05/2015
# Date: 27/05/2015
from
._Crawler
import
Crawler
import
hmac
,
hashlib
import
hmac
,
hashlib
import
requests
import
requests
import
os
import
os
import
random
import
random
import
urllib.parse
as
uparse
import
urllib.parse
as
uparse
from
lxml
import
etree
from
lxml
import
etree
from
gargantext.settings
import
API_TOKENS
from
gargantext.settings
import
API_TOKENS
#from gargantext.util.files import build_corpus_path
from
._Crawler
import
Crawler
from
gargantext.util.
db
import
session
from
gargantext.util.
timeit_damnit
import
timing
from
gargantext.models
import
Node
class
CernCrawler
(
Crawler
):
class
CernCrawler
(
Crawler
):
'''CERN SCOAP3 API Interaction'''
'''CERN SCOAP3 API Interaction'''
def
__init__
(
self
):
API
=
API_TOKENS
[
"CERN"
]
self
.
apikey
=
API
[
"APIKEY"
]
.
encode
(
"utf-8"
)
self
.
secret
=
bytearray
(
API
[
"APISECRET"
]
.
encode
(
"utf-8"
))
self
.
BASE_URL
=
u"http://api.scoap3.org/search?"
def
__generate_signature__
(
self
,
url
):
def
__generate_signature__
(
self
,
url
):
'''creation de la signature'''
'''creation de la signature'''
#hmac-sha1 salted with secret
#hmac-sha1 salted with secret
return
hmac
.
new
(
self
.
secret
,
url
,
hashlib
.
sha1
)
.
hexdigest
()
return
hmac
.
new
(
self
.
secret
,
url
.
encode
(
"utf-8"
)
,
hashlib
.
sha1
)
.
hexdigest
()
def
__format_query__
(
self
,
query
,
of
=
"xm"
,
fields
=
None
):
def
__format_query__
(
self
,
query
,
of
=
"xm"
,
fields
=
None
):
''' for query filters params
''' for query filters params
...
@@ -45,89 +48,71 @@ class CernCrawler(Crawler):
...
@@ -45,89 +48,71 @@ class CernCrawler(Crawler):
def
__format_url__
(
self
,
dict_q
):
def
__format_url__
(
self
,
dict_q
):
'''format the url with encoded query'''
'''format the url with encoded query'''
#add the apikey
#add the apikey at the end
dict_q
[
"apikey"
]
=
[
self
.
apikey
]
dict_q
[
"apikey"
]
=
self
.
apikey
params
=
"&"
.
join
([(
str
(
k
)
+
"="
+
str
(
uparse
.
quote
(
v
[
0
])))
for
k
,
v
in
sorted
(
dict_q
.
items
())])
#dict_q["p"] = dict_q["p"].replace(" ", "+") >> quote_plus
params
=
(
"&"
)
.
join
([(
str
(
k
)
+
"="
+
uparse
.
quote_plus
(
v
))
for
k
,
v
in
sorted
(
dict_q
.
items
())])
return
self
.
BASE_URL
+
params
return
self
.
BASE_URL
+
params
def
sign_url
(
self
,
dict_q
):
def
sign_url
(
self
,
dict_q
):
'''add signature'''
'''add signature'''
API
=
API_TOKENS
[
"CERN"
]
self
.
apikey
=
API
[
"APIKEY"
]
self
.
secret
=
API
[
"APISECRET"
]
.
encode
(
"utf-8"
)
self
.
BASE_URL
=
u"http://api.scoap3.org/search?"
url
=
self
.
__format_url__
(
dict_q
)
url
=
self
.
__format_url__
(
dict_q
)
return
url
+
"&signature="
+
self
.
__generate_signature__
(
url
.
encode
(
"utf-8"
)
)
return
url
+
"&signature="
+
self
.
__generate_signature__
(
url
)
@
timing
def
create_corpus
(
self
):
def
download
(
self
,
query
):
#create a corpus
corpus
=
Node
(
name
=
self
.
query
,
#user_id = self.user_id,
parent_id
=
self
.
project_id
,
typename
=
'CORPUS'
,
hyperdata
=
{
"action"
:
"Scrapping data"
,
"language_id"
:
self
.
type
[
"default_language"
]
}
)
#add the resource
corpus
.
add_resource
(
type
=
self
.
type
[
"type"
],
name
=
self
.
type
[
"name"
],
path
=
self
.
path
)
try
:
print
(
"PARSING"
)
# p = eval(self.type["parser"])()
session
.
add
(
corpus
)
session
.
commit
()
self
.
corpus_id
=
corpus
.
id
parse_extract_indexhyperdata
(
corpus
.
id
)
return
self
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
error
)
session
.
rollback
()
return
self
def
download
(
self
):
import
time
self
.
path
=
"/tmp/results.xml"
self
.
path
=
"/tmp/results.xml"
query
=
self
.
__format_query__
(
self
.
query
)
query
=
self
.
__format_query__
(
query
)
url
=
self
.
sign_url
(
query
)
url
=
self
.
sign_url
(
query
)
start
=
time
.
time
()
r
=
requests
.
get
(
url
,
stream
=
True
)
r
=
requests
.
get
(
url
,
stream
=
True
)
downloaded
=
False
downloaded
=
False
#the long part
#the long part
with
open
(
self
.
path
,
'wb'
)
as
f
:
with
open
(
self
.
path
,
'wb'
)
as
f
:
print
(
"Downloading file"
)
print
(
"Downloading file"
)
for
chunk
in
r
.
iter_content
(
chunk_size
=
1024
):
for
chunk
in
r
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
# filter out keep-alive new chunks
if
chunk
:
# filter out keep-alive new chunks
#print("===")
#print("===")
f
.
write
(
chunk
)
f
.
write
(
chunk
)
downloaded
=
True
downloaded
=
True
end
=
time
.
time
()
#print (">>>>>>>>>>LOAD results", end-start)
return
downloaded
return
downloaded
def
get_ids
(
self
,
query
):
def
scan_results
(
self
):
'''get results nb + individual ids of search query'''
'''scanner le nombre de resultat en récupérant 1 seul résultat
dict_q
=
uparse
.
parse_qs
(
query
)
#parameters for a global request
dict_q
[
"p"
]
=
query
dict_q
[
"of"
]
=
"id"
dict_q
[
"rg"
]
=
"10000"
#api key is added when formatting url
url
=
self
.
__format_url__
(
dict_q
)
signed_url
=
url
+
"&signature="
+
self
.
__generate_signature__
(
url
)
r
=
requests
.
get
(
signed_url
)
print
(
signed_url
)
self
.
ids
=
r
.
json
()
#self.results_nb = len(self.ids)
#self.generate_urls()
return
(
self
.
ids
)
def
generate_urls
(
self
):
''' generate raw urls of ONE record'''
self
.
urls
=
[
"http://repo.scoap3.org/record/
%
i/export/xm?ln=en"
%
rid
for
rid
in
self
.
ids
]
return
self
.
urls
def
fetch_records
(
self
,
ids
):
''' for NEXT time'''
raise
NotImplementedError
def
scan_results
(
self
,
query
):
'''[OLD]scanner le nombre de resultat en récupérant 1 seul résultat
qui affiche uniquement l'auteur de la page 1
qui affiche uniquement l'auteur de la page 1
on récupère le commentaire en haut de la page
on récupère le commentaire en haut de la page
'''
'''
import
time
self
.
results_nb
=
0
self
.
results_nb
=
0
query
=
self
.
__format_query__
(
self
.
query
,
of
=
"hb"
)
query
=
self
.
__format_query__
(
query
,
of
=
"hb"
)
query
[
"ot"
]
=
"100"
query
[
"ot"
]
=
"100"
query
[
"jrec"
]
=
'1'
query
[
"jrec"
]
=
'1'
query
[
"rg"
]
=
'1'
query
[
"rg"
]
=
'1'
url
=
self
.
sign_url
(
query
)
url
=
self
.
sign_url
(
query
)
print
(
url
)
#
print(url)
#start = time.time()
#start = time.time()
r
=
requests
.
get
(
url
)
r
=
requests
.
get
(
url
)
#end = time.time()
#end = time.time()
...
...
gargantext/util/crawlers/_Crawler.py
View file @
94d46cce
...
@@ -101,7 +101,7 @@ class Crawler:
...
@@ -101,7 +101,7 @@ class Crawler:
)
)
session
.
add
(
corpus
)
session
.
add
(
corpus
)
session
.
commit
()
session
.
commit
()
scheduled
(
parse_extract_indexhyperdata
)(
corpus
.
id
)
scheduled
(
parse_extract_indexhyperdata
(
corpus
.
id
)
)
else
:
else
:
#add the resource
#add the resource
corpus
.
add_resource
(
corpus
.
add_resource
(
...
...
moissonneurs/cern.py
View file @
94d46cce
#!/usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# ****************************
# ****************************
# ***** CERN
Scrapp
er *****
# ***** CERN
Crawl
er *****
# ****************************
# ****************************
RESOURCE_TYPE_SCOAP
=
9
import
logging
from
django.shortcuts
import
redirect
,
render
from
django.http
import
Http404
,
HttpResponseRedirect
,
HttpResponseForbidden
from
logging.handlers
import
RotatingFileHandler
# création de l'objet logger qui va nous servir à écrire dans les logs
logger
=
logging
.
getLogger
()
# on met le niveau du logger à DEBUG, comme ça il écrit tout
logger
.
setLevel
(
logging
.
DEBUG
)
# création d'un formateur qui va ajouter le temps, le niveau
# de chaque message quand on écrira un message dans le log
formatter
=
logging
.
Formatter
(
'
%(asctime)
s ::
%(levelname)
s ::
%(message)
s'
)
# création d'un handler qui va rediriger une écriture du log vers
# un fichier en mode 'append', avec 1 backup et une taille max de 1Mo
#>>> Permission denied entre en conflit avec les los django
#file_handler = RotatingFileHandler('.activity.log', 'a', 1000000, 1)
# on lui met le niveau sur DEBUG, on lui dit qu'il doit utiliser le formateur
# créé précédement et on ajoute ce handler au logger
#~ file_handler.setLevel(logging.DEBUG)
#~ file_handler.setFormatter(formatter)
#~ logger.addHandler(file_handler)
# création d'un second handler qui va rediriger chaque écriture de log
# sur la console
steam_handler
=
logging
.
StreamHandler
()
steam_handler
.
setLevel
(
logging
.
DEBUG
)
logger
.
addHandler
(
steam_handler
)
import
json
import
datetime
from
os
import
path
import
threading
import
hmac
,
hashlib
import
requests
import
lxml
import
subprocess
import
urllib.parse
as
uparse
from
lxml
import
etree
from
bs4
import
BeautifulSoup
,
Comment
from
collections
import
defaultdict
#from gargantext.util.files import download
from
gargantext.settings
import
API_TOKENS
as
API
#from private import API_PERMISSIONS
def
save
(
request
,
project_id
)
:
try
:
project_id
=
int
(
project_id
)
except
ValueError
:
raise
Http404
()
# do we have a valid project?
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
if
project
is
None
:
raise
Http404
()
user
=
cache
.
User
[
request
.
user
.
id
]
if
not
user
.
owns
(
project
):
raise
HttpResponseForbidden
()
from
gargantext.constants
import
get_resource
,
load_crawler
,
QUERY_SIZE_N_MAX
from
gargantext.models.nodes
import
Node
from
gargantext.util.db
import
session
from
gargantext.util.db_cache
import
cache
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.toolchain
import
parse_extract_indexhyperdata
if
request
.
method
==
"POST"
:
query
=
request
.
POST
[
"query"
]
name
=
request
.
POST
[
"string"
]
corpus
=
project
.
add_child
(
name
=
name
,
typename
=
"CORPUS"
)
corpus
.
add_resource
(
type
=
resourcetype
(
'Cern (MARC21 XML)'
)
,
path
=
filename
,
url
=
None
)
print
(
"Adding the resource"
)
def
query
(
request
):
print
(
request
.
method
)
alist
=
[]
def
query
(
request
):
'''get GlobalResults()'''
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
query
=
request
.
POST
[
"query"
]
query
=
request
.
POST
[
"query"
]
N
=
int
(
request
.
POST
[
"N"
])
source
=
get_resource
(
RESOURCE_TYPE_SCOAP
)
if
source
[
"crawler"
]
is
not
None
:
crawlerbot
=
load_crawler
(
source
)()
#old raw way to get results_nb
#results = crawlerbot.scan_results(query)
ids
=
crawlerbot
.
get_ids
(
query
)
return
JsonHttpResponse
({
"results_nb"
:
int
(
len
(
ids
)),
"ids"
:
ids
})
def
save
(
request
,
project_id
):
'''save'''
if
request
.
method
==
"POST"
:
query
=
request
.
POST
.
get
(
"query"
)
try
:
N
=
int
(
request
.
POST
.
get
(
"N"
))
except
:
N
=
0
print
(
query
,
N
)
#for next time
#ids = request.POST["ids"]
source
=
get_resource
(
RESOURCE_TYPE_SCOAP
)
if
N
==
0
:
raise
Http404
()
if
N
>
QUERY_SIZE_N_MAX
:
if
N
>
QUERY_SIZE_N_MAX
:
msg
=
"Invalid sample size N =
%
i (max =
%
i)"
%
(
N
,
QUERY_SIZE_N_MAX
)
N
=
QUERY_SIZE_N_MAX
print
(
"ERROR(scrap: pubmed stats): "
,
msg
)
raise
ValueError
(
msg
)
try
:
project_id
=
int
(
project_id
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" query ="
,
query
)
except
ValueError
:
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" N ="
,
N
)
raise
Http404
()
#Here Requests API
# do we have a valid project?
#
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
#API_TOKEN = API["CERN"]
if
project
is
None
:
raise
Http404
()
#instancia = Scraper()
user
=
cache
.
User
[
request
.
user
.
id
]
if
not
user
.
owns
(
project
):
# serialFetcher (n_last_years, query, query_size)
raise
HttpResponseForbidden
()
#alist = instancia.serialFetcher( 5, query , N )
# corpus node instanciation as a Django model
data
=
alist
corpus
=
Node
(
name
=
query
,
user_id
=
request
.
user
.
id
,
parent_id
=
project_id
,
typename
=
'CORPUS'
,
hyperdata
=
{
"action"
:
"Scrapping data"
,
"language_id"
:
"en"
}
)
#download_file
crawler_bot
=
load_crawler
(
source
)()
#for now no way to force downloading X records
#the long running command
filename
=
crawler_bot
.
download
(
query
)
corpus
.
add_resource
(
type
=
source
[
"type"
]
#, name = source["name"]
,
path
=
crawler_bot
.
path
)
session
.
add
(
corpus
)
session
.
commit
()
#corpus_id = corpus.id
try
:
scheduled
(
parse_extract_indexhyperdata
)(
corpus
.
id
)
except
Exception
as
error
:
print
(
'WORKFLOW ERROR'
)
print
(
error
)
try
:
print_tb
(
error
.
__traceback__
)
except
:
pass
# IMPORTANT ---------------------------------
# sanitize session after interrupted transact
session
.
rollback
()
# --------------------------------------------
return
render
(
template_name
=
'pages/projects/wait.html'
,
request
=
request
,
context
=
{
'user'
:
request
.
user
,
'project'
:
project
,
},
)
data
=
[
query_string
,
query
,
N
]
print
(
data
)
return
JsonHttpResponse
(
data
)
return
JsonHttpResponse
(
data
)
class
CERN_API
(
object
):
'''CERN SCOAP3 Interaction'''
def
__init__
(
self
,
query
,
filename
=
"./results.xml"
):
self
.
query
=
query
self
.
apikey
=
API
[
"TOKEN"
]
self
.
secret
=
API
[
"SECRET"
]
.
encode
(
"utf-8"
)
self
.
results
=
self
.
get_results
(
filename
)
self
.
BASE_URL
=
u"http://api.scoap3.org/search?"
def
__generate_signature__
(
self
,
url
):
'''creation de la signature'''
#hmac-sha1 salted with secret
return
hmac
.
new
(
self
.
secret
,
url
,
hashlib
.
sha1
)
.
hexdigest
()
def
__format_url__
(
self
):
'''format the url with encoded query'''
dict_q
=
uparse
.
parse_qs
(
self
.
query
)
#add the apikey
dict_q
[
"apikey"
]
=
[
self
.
apikey
]
params
=
"&"
.
join
([(
str
(
k
)
+
"="
+
str
(
uparse
.
quote
(
v
[
0
])))
for
k
,
v
in
sorted
(
dict_q
.
items
())])
return
self
.
BASE_URL
+
params
def
sign_url
(
self
):
'''add signature'''
url
=
self
.
__format_url__
()
return
url
+
"&signature="
+
self
.
__generate_signature__
(
url
.
encode
(
"utf-8"
))
def
get_results
(
self
,
filename
):
url
=
self
.
sign_url
()
r
=
requests
.
get
(
url
,
stream
=
True
)
with
open
(
filename
,
'wb'
)
as
f
:
for
chunk
in
r
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
# filter out keep-alive new chunks
f
.
write
(
chunk
)
return
filename
def
parse_xml
(
filename
,
MARCXML
):
parser
=
etree
.
XMLParser
()
with
open
(
self
.
filename
,
'r'
)
as
f
:
root
=
etree
.
tostring
(
f
.
read
())
data
=
f
.
read
()
records
=
[]
for
record
in
data
.
split
(
"<record>"
)[
1
:]:
soup
=
BeautifulSoup
(
"<record>"
+
record
,
"lxml"
)
r
=
{
v
:[]
for
v
in
self
.
MARC21
[
"700"
]
.
values
()}
r
[
"uid"
]
=
soup
.
find
(
"controlfield"
)
.
text
for
data
in
soup
.
find_all
(
"datafield"
):
tag
=
data
.
get
(
"tag"
)
if
tag
in
self
.
MARC21
.
keys
():
for
sub
in
data
.
find_all
(
"subfield"
):
code
=
sub
.
get
(
"code"
)
if
code
in
self
.
MARC21
[
tag
]
.
keys
():
if
tag
==
"700"
:
r
[
self
.
MARC21
[
tag
][
code
]]
.
append
(
sub
.
text
)
else
:
r
[
self
.
MARC21
[
tag
][
code
]]
=
sub
.
text
records
.
append
(
r
.
decode
(
'utf-8'
))
return
JsonHttpResponse
(
records
)
#query="of=xm"
#a = CERN_API(query, "./full.xml")
#p = CERNParser("./full.xml")
#print(p.MARC21.keys())
#~ #p.parse()
#~ with open("./results_full.json", "r") as f:
#~ data = json.load(f)
#~ for record in data["records"]:
#~ print(record.keys())
moissonneurs/urls.py
View file @
94d46cce
...
@@ -13,15 +13,13 @@
...
@@ -13,15 +13,13 @@
# Available databases :
# Available databases :
## Pubmed
## Pubmed
## IsTex,
## IsTex,
##
En cours
CERN
## CERN
from
django.conf.urls
import
url
from
django.conf.urls
import
url
import
moissonneurs.pubmed
as
pubmed
import
moissonneurs.pubmed
as
pubmed
import
moissonneurs.istex
as
istex
import
moissonneurs.istex
as
istex
# TODO
import
moissonneurs.cern
as
cern
import
moissonneurs.cern
as
cern
# TODO
# TODO
...
@@ -38,8 +36,6 @@ urlpatterns = [ url(r'^pubmed/query$' , pubmed.query )
...
@@ -38,8 +36,6 @@ urlpatterns = [ url(r'^pubmed/query$' , pubmed.query )
,
url
(
r'^istex/query$'
,
istex
.
query
)
,
url
(
r'^istex/query$'
,
istex
.
query
)
,
url
(
r'^istex/save/(\d+)'
,
istex
.
save
)
,
url
(
r'^istex/save/(\d+)'
,
istex
.
save
)
,
url
(
r'^cern/query$'
,
cern
.
query
)
# TODO
,
url
(
r'^cern/save/(\d+)'
,
cern
.
save
)
,
url
(
r'^scoap3/query$'
,
cern
.
query
)
,
url
(
r'^scoap3/save/(\d+)'
,
cern
.
save
)
]
]
templates/pages/projects/project.html
View file @
94d46cce
...
@@ -61,6 +61,23 @@
...
@@ -61,6 +61,23 @@
</p>
</p>
</div>
</div>
</div>
</div>
<!-- Modal -->
<div
id=
"wait"
class=
"modal"
style=
"top:8%;"
>
<div
class=
"jumbotron"
>
<div
class=
"container theme-showcase jumbotron"
role=
"main"
>
<div>
<div
class=
"col-md-3"
></div>
<div
class=
"col-md-6"
>
<h2>
Your file has been uploaded !
</h2>
<h2>
Gargantext need some time to eat it.
</h2>
<h2>
Duration depends on the size of the dish.
</h2>
<a
class=
"btn btn-primary btn-lg"
href=
"/projects/{{ project.id }}"
title=
"Click and test by yourself"
>
Continue on Gargantext
</a>
</div>
<div
class=
"col-md-3"
></div>
</div>
</div>
</div>
</div>
</div>
</div>
...
@@ -71,16 +88,7 @@
...
@@ -71,16 +88,7 @@
<div
class=
"container"
>
<div
class=
"container"
>
<!-- Modal -->
<div
id=
"wait"
class=
"modal row col-md-6"
>
<div
class=
"modal-dialog "
>
<h2>
Your file has been uploaded !
</h2>
<h2>
Gargantext need some time to eat it.
</h2>
<h2>
Duration depends on the size of the dish.
</h2>
<a
class=
"btn btn-primary btn-lg"
href=
"/projects/{{ project.id }}"
title=
"Click and test by yourself"
>
Continue on Gargantext
</a>
</div>
</div>
{% if list_corpora %}
{% if list_corpora %}
{% for key, corpora in list_corpora.items %}
{% for key, corpora in list_corpora.items %}
...
@@ -258,8 +266,8 @@
...
@@ -258,8 +266,8 @@
<td>
<td>
<div
id=
"pubmedcrawl"
style=
"visibility: hidden;"
>
<div
id=
"pubmedcrawl"
style=
"visibility: hidden;"
>
Do you have a file already?
Do you have a file already?
<input
type=
"radio"
id=
"file_yes"
name=
"file1"
onclick=
"FileOrNotFile(this.value);"
class=
"file1"
value=
"true"
checked
>
Yes
</input>
<input
type=
"radio"
id=
"file_yes"
name=
"file1"
onclick=
"FileOrNotFile(this.value);"
class=
"file1"
value=
"true"
checked
>
Yes
</input>
<input
type=
"radio"
id=
"file_no"
name=
"file1"
onclick=
"FileOrNotFile(this.value);"
class=
"file1"
value=
"false"
>
No
</input>
<input
type=
"radio"
id=
"file_no"
name=
"file1"
onclick=
"FileOrNotFile(this.value);"
class=
"file1"
value=
"false"
>
No
</input>
</div>
</div>
</td>
</td>
</tr>
</tr>
...
@@ -336,7 +344,7 @@
...
@@ -336,7 +344,7 @@
$
(
'#addcorpus'
).
modal
(
'hide'
);
$
(
'#addcorpus'
).
modal
(
'hide'
);
$
(
"#wait"
).
modal
(
"show"
);
$
(
"#wait"
).
modal
(
"show"
);
},
30
00
);
},
6
00
);
},
},
error
:
function
(
result
)
{
error
:
function
(
result
)
{
console
.
log
(
"in doTheQuery(). Data not found"
);
console
.
log
(
"in doTheQuery(). Data not found"
);
...
@@ -391,10 +399,11 @@
...
@@ -391,10 +399,11 @@
$
(
"#theresults"
).
html
(
'<img width="30px" src="{% static "img/loading-bar.gif" %}"></img>'
)
$
(
"#theresults"
).
html
(
'<img width="30px" src="{% static "img/loading-bar.gif" %}"></img>'
)
console
.
log
(
"disabling "
+
"#"
+
value
.
id
)
console
.
log
(
"disabling "
+
"#"
+
value
.
id
)
$
(
"#"
+
value
.
id
).
prop
(
'onclick'
,
null
);
$
(
"#"
+
value
.
id
).
prop
(
'onclick'
,
null
);
var
theType
=
$
(
"#id_type option:selected"
).
html
();
var
theType
=
$
(
"#id_type option:selected"
).
html
();
var
SourceTypeId
=
$
(
"#id_type"
).
val
()
if
(
theType
==
"Pubmed [XML]"
)
{
//alert(SourceTypeId);
//PUBMED = 3
if
(
SourceTypeId
==
"3"
)
{
$
.
ajax
({
$
.
ajax
({
// contentType: "application/json",
// contentType: "application/json",
url
:
window
.
location
.
origin
+
"/moissonneurs/pubmed/query"
,
url
:
window
.
location
.
origin
+
"/moissonneurs/pubmed/query"
,
...
@@ -434,7 +443,7 @@
...
@@ -434,7 +443,7 @@
});
});
}
}
if
(
theType
==
"ISTex
"
)
{
if
(
SourceTypeId
==
"8
"
)
{
console
.
log
(
window
.
location
.
origin
+
"moissonneurs/istex/query"
)
console
.
log
(
window
.
location
.
origin
+
"moissonneurs/istex/query"
)
$
.
ajax
({
$
.
ajax
({
// contentType: "application/json",
// contentType: "application/json",
...
@@ -470,6 +479,64 @@
...
@@ -470,6 +479,64 @@
}
}
});
});
}
}
//
<
option
value
=
"9"
>
SCOAP
[
XML
]
<
/option
>
if
(
SourceTypeId
==
"9"
){
$
.
ajax
({
// contentType: "application/json",
url
:
window
.
location
.
origin
+
"/moissonneurs/cern/query"
,
data
:
formData
,
type
:
'POST'
,
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
data
)
{
console
.
log
(
"SUCCESS"
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
$
(
"#"
+
value
.
id
).
attr
(
'onclick'
,
'getGlobalResults(this);'
);
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
N
=
data
[
"results_nb"
]
if
(
N
>
0
)
{
if
(
N
<=
{{
query_size
}}){
$
(
"#theresults"
).
html
(
"<i> <b>"
+
pubmedquery
+
"</b>: "
+
N
+
" publications </i><br>"
)
$
(
"#submit_thing"
).
html
(
"Download!"
)
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
//$("#submit_thing").attr('onclick', testCERN(query, N));
$
(
"#submit_thing"
).
on
(
"click"
,
function
(){
testCERN
(
pubmedquery
,
N
);
//$("#submit_thing").onclick()
})}
//(N > {{query_size}})
else
{
$
(
"#theresults"
).
html
(
"<i> <b>"
+
pubmedquery
+
"</b>: "
+
N
+
" publications </i><br>"
)
$
(
'#submit_thing'
).
prop
(
'disabled'
,
false
);
$
(
"#submit_thing"
).
html
(
"Processing a sample file"
)
$
(
"#submit_thing"
).
on
(
"click"
,
function
(){
testCERN
(
pubmedquery
,
N
);
//$("#submit_thing").onclick()
})}
}
else
{
$
(
"#theresults"
).
html
(
"<i> <b>"
+
pubmedquery
+
"</b>: No results!.</i><br>"
)
if
(
data
[
0
]
==
false
)
$
(
"#theresults"
).
html
(
theType
+
" connection error!</i><br>"
)
$
(
'#submit_thing'
).
prop
(
'disabled'
,
false
);
}
},
error
:
function
(
result
)
{
$
(
"#theresults"
).
html
(
theType
+
" connection error!</i><br>"
)
$
(
'#submit_thing'
).
prop
(
'disabled'
,
true
);
}
});
}
}
}
// CSS events for selecting one Radio-Input
// CSS events for selecting one Radio-Input
...
@@ -508,17 +575,20 @@
...
@@ -508,17 +575,20 @@
}
}
//CSS events for changing the Select element
//CSS events for changing the Select element
function
CustomForSelect
(
selected
)
{
function
CustomForSelect
(
selected
)
{
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
selected
=
selected
.
toLowerCase
()
var
is_pubmed
=
(
selected
.
indexOf
(
'pubmed'
)
!=
-
1
);
selectedId
=
$
(
"#id_type"
).
val
()
var
is_istex
=
(
selected
.
indexOf
(
'istex'
)
!=
-
1
);
//alert(selectedId
);
if
(
is_pubmed
||
is_istex
)
{
//by typeID 3 = PUBMED 8 = ISTEX 9 = CERN
// if(selected=="pubmed
") {
if
(
selectedId
==
"3"
||
selectedId
==
"8"
||
selectedId
==
"9
"
)
{
console
.
log
(
"show the button for: "
+
selected
)
console
.
log
(
"show the button for: "
+
selected
)
$
(
"#pubmedcrawl"
).
css
(
"visibility"
,
"visible"
);
$
(
"#pubmedcrawl"
).
css
(
"visibility"
,
"visible"
);
$
(
"#pubmedcrawl"
).
show
();
$
(
"#pubmedcrawl"
).
show
();
$
(
"#file_yes"
).
click
();
//$("#file_yes").click();
$
(
"#submit_thing"
).
html
(
"Process this!"
)
$
(
"#submit_thing"
).
html
(
"Process this!"
)
}
}
// hide Radio-Inputs and trigger @upload-file events
// hide Radio-Inputs and trigger @upload-file events
...
@@ -557,6 +627,51 @@
...
@@ -557,6 +627,51 @@
});
});
return
false
;
return
false
;
}
}
function
testCERN
(
query
,
N
){
//alert("CERN!")
console
.
log
(
"In testCern"
)
if
(
!
query
||
query
==
""
)
return
;
//var origQuery = query
var
data
=
{
"query"
:
query
,
"N"
:
N
};
var
projectid
=
window
.
location
.
href
.
split
(
"projects"
)[
1
].
replace
(
/
\/
/g
,
''
)
//replace all the slashes
console
.
log
(
data
)
$
.
ajax
({
dataType
:
'json'
,
url
:
window
.
location
.
origin
+
"/moissonneurs/cern/save/"
+
projectid
,
data
:
data
,
type
:
'POST'
,
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
data
)
{
console
.
log
(
"ajax_success: in testCERN()"
)
console
.
log
(
data
)
alert
(
"OK"
)
setTimeout
(
function
()
{
alert
(
"TimeOut!"
)
$
(
'#addcorpus'
).
modal
(
'hide'
)
$
(
"#wait"
).
modal
(
"show"
)
//setTimeout(, 300)
//location.reload();
},
600
);
},
error
:
function
(
data
)
{
console
.
log
(
data
)
setTimeout
(
function
()
{
$
(
'#addcorpus'
).
modal
(
'hide'
)
$
(
"#wait"
).
modal
(
"show"
)
//setTimeout(, 300)
//location.reload();
},
600
);
},
});
}
function
testISTEX
(
query
,
N
)
{
function
testISTEX
(
query
,
N
)
{
console
.
log
(
"in testISTEX:"
);
console
.
log
(
"in testISTEX:"
);
...
@@ -584,10 +699,10 @@
...
@@ -584,10 +699,10 @@
$
(
"#wait"
).
modal
(
"show"
);
$
(
"#wait"
).
modal
(
"show"
);
//location.reload();
//location.reload();
},
30
00
);
},
6
00
);
},
},
error
:
function
(
result
)
{
error
:
function
(
result
)
{
console
.
log
(
"in testISTEX(). Data not found"
);
console
.
log
(
"in testISTEX(). Data not found"
);
}
}
});
});
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment