Commit 2b36c5d0 authored by PkSM3's avatar PkSM3

[UPDATE] pubmed|istex removal (for the moment)

parent fc089d47
This diff is collapsed.
......@@ -73,75 +73,75 @@ def doTheQuery(request , project_id):
alist = ["hola","mundo"]
if request.method == "POST":
query = request.POST["query"]
name = request.POST["string"]
instancia = MedlineFetcher()
thequeries = json.loads(query)
urlreqs = []
for yearquery in thequeries:
urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
alist = ["tudo fixe" , "tudo bem"]
"""
urlreqs: List of urls to query.
- Then, to each url in urlreqs you do:
eFetchResult = urlopen(url)
eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
"""
thefile = "how we do this here?"
resource_type = ResourceType.objects.get(name="pubmed" )
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=name,
)
corpus.save()
tasks = MedlineFetcher()
for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.datetime.now().isoformat()))
tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished
dwnldsOK = 0
for filename in tasks.firstResults:
if filename!=False:
corpus.add_resource( user=request.user, type=resource_type, file=filename )
dwnldsOK+=1
# query = request.POST["query"]
# name = request.POST["string"]
# instancia = MedlineFetcher()
# thequeries = json.loads(query)
# urlreqs = []
# for yearquery in thequeries:
# urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
# alist = ["tudo fixe" , "tudo bem"]
# """
# urlreqs: List of urls to query.
# - Then, to each url in urlreqs you do:
# eFetchResult = urlopen(url)
# eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
# """
# thefile = "how we do this here?"
# resource_type = ResourceType.objects.get(name="pubmed" )
# parent = Node.objects.get(id=project_id)
# node_type = NodeType.objects.get(name='Corpus')
# type_id = NodeType.objects.get(name='Document').id
# user_id = User.objects.get( username=request.user ).id
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# name=name,
# )
# corpus.save()
# tasks = MedlineFetcher()
# for i in range(8):
# t = threading.Thread(target=tasks.worker2) #thing to do
# t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
# t.start()
# for url in urlreqs:
# filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.datetime.now().isoformat()))
# tasks.q.put( [url , filename]) #put a task in th queue
# tasks.q.join() # wait until everything is finished
# dwnldsOK = 0
# for filename in tasks.firstResults:
# if filename!=False:
# corpus.add_resource( user=request.user, type=resource_type, file=filename )
# dwnldsOK+=1
if dwnldsOK == 0: return JsonHttpResponse(["fail"])
# if dwnldsOK == 0: return JsonHttpResponse(["fail"])
# do the WorkFlow
try:
if DEBUG is True:
# corpus.workflow() # old times...
corpus.workflow__MOV()
# corpus.write_everything_to_DB()
else:
# corpus.workflow.apply_async((), countdown=3)
corpus.workflow__MOV().apply_async((), countdown=3) # synchronous! because is faaast
# corpus.write_everything_to_DB.apply_async((), countdown=3) # asynchronous
# # do the WorkFlow
# try:
# if DEBUG is True:
# # corpus.workflow() # old times...
# corpus.workflow__MOV()
# # corpus.write_everything_to_DB()
# else:
# # corpus.workflow.apply_async((), countdown=3)
# corpus.workflow__MOV().apply_async((), countdown=3) # synchronous! because is faaast
# # corpus.write_everything_to_DB.apply_async((), countdown=3) # asynchronous
return JsonHttpResponse(["workflow","finished"])
except Exception as error:
print(error)
# return JsonHttpResponse(["workflow","finished"])
# except Exception as error:
# print(error)
return JsonHttpResponse(["workflow","finished","outside the try-except"])
return JsonHttpResponse(["out of service for the moment"])
data = alist
return JsonHttpResponse(data)
......@@ -164,59 +164,59 @@ def testISTEX(request , project_id):
print(query_string , query , N)
urlreqs = []
pagesize = 50
tasks = MedlineFetcher()
chunks = list(tasks.chunks(range(N), pagesize))
for k in chunks:
if (k[0]+pagesize)>N: pagesize = N-k[0]
urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
print(urlreqs)
urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
print(urlreqs)
resource_type = ResourceType.objects.get(name="istext" )
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=query,
)
corpus.save()
# configuring your queue with the event
for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished
for filename in tasks.firstResults:
corpus.add_resource( user=request.user, type=resource_type, file=filename )
corpus.save()
print("DEBUG:",DEBUG)
# do the WorkFlow
try:
if DEBUG is True:
corpus.workflow()
else:
corpus.workflow.apply_async((), countdown=3)
return JsonHttpResponse(["workflow","finished"])
except Exception as error:
print(error)
# urlreqs = []
# pagesize = 50
# tasks = MedlineFetcher()
# chunks = list(tasks.chunks(range(N), pagesize))
# for k in chunks:
# if (k[0]+pagesize)>N: pagesize = N-k[0]
# urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
# print(urlreqs)
# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
# print(urlreqs)
# resource_type = ResourceType.objects.get(name="istext" )
# parent = Node.objects.get(id=project_id)
# node_type = NodeType.objects.get(name='Corpus')
# type_id = NodeType.objects.get(name='Document').id
# user_id = User.objects.get( username=request.user ).id
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# name=query,
# )
# corpus.save()
# # configuring your queue with the event
# for i in range(8):
# t = threading.Thread(target=tasks.worker2) #thing to do
# t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
# t.start()
# for url in urlreqs:
# filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
# tasks.q.put( [url , filename]) #put a task in th queue
# tasks.q.join() # wait until everything is finished
# for filename in tasks.firstResults:
# corpus.add_resource( user=request.user, type=resource_type, file=filename )
# corpus.save()
# print("DEBUG:",DEBUG)
# # do the WorkFlow
# try:
# if DEBUG is True:
# corpus.workflow()
# else:
# corpus.workflow.apply_async((), countdown=3)
# return JsonHttpResponse(["workflow","finished"])
# except Exception as error:
# print(error)
data = [query_string,query,N]
return JsonHttpResponse(data)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment