Commit a8dff456 authored by Administrator's avatar Administrator

Merge branch 'unstable' into testing

parents 5872b406 f51ea0e3
...@@ -66,6 +66,7 @@ INSTALLED_APPS = ( ...@@ -66,6 +66,7 @@ INSTALLED_APPS = (
'cte_tree', 'cte_tree',
'node', 'node',
'ngram', 'ngram',
'scrap_pubmed',
'django_hstore', 'django_hstore',
'djcelery', 'djcelery',
'aldjemy', 'aldjemy',
......
...@@ -6,6 +6,7 @@ from django.contrib.auth.views import login ...@@ -6,6 +6,7 @@ from django.contrib.auth.views import login
from gargantext_web import views from gargantext_web import views
import gargantext_web.api import gargantext_web.api
import scrap_pubmed.views as pubmedscrapper
admin.autodiscover() admin.autodiscover()
...@@ -67,7 +68,12 @@ urlpatterns = patterns('', ...@@ -67,7 +68,12 @@ urlpatterns = patterns('',
url(r'^ngrams$', views.ngrams), url(r'^ngrams$', views.ngrams),
url(r'^nodeinfo/(\d+)$', views.nodeinfo), url(r'^nodeinfo/(\d+)$', views.nodeinfo),
url(r'^tests/mvc$', views.tests_mvc), url(r'^tests/mvc$', views.tests_mvc),
url(r'^tests/mvc-listdocuments$', views.tests_mvc_listdocuments) url(r'^tests/mvc-listdocuments$', views.tests_mvc_listdocuments),
url(r'^tests/pubmedquery$', pubmedscrapper.getGlobalStats),
url(r'^tests/project/(\d+)/pubmedquery/go$', pubmedscrapper.doTheQuery),
url(r'^tests/project/(\d+)/ISTEXquery/go$', pubmedscrapper.testISTEX)
) )
......
...@@ -260,6 +260,8 @@ def project(request, project_id): ...@@ -260,6 +260,8 @@ def project(request, project_id):
cooclists = ""#.children.filter(type=type_cooclist) cooclists = ""#.children.filter(type=type_cooclist)
for corpus in corpora: for corpus in corpora:
# print("corpus", corpus.pk , corpus.name , corpus.type_id)
docs_count = corpus.children.count() docs_count = corpus.children.count()
docs_total += docs_count docs_total += docs_count
...@@ -267,15 +269,30 @@ def project(request, project_id): ...@@ -267,15 +269,30 @@ def project(request, project_id):
corpus_view['id'] = corpus.pk corpus_view['id'] = corpus.pk
corpus_view['name'] = corpus.name corpus_view['name'] = corpus.name
corpus_view['count'] = corpus.children.count() corpus_view['count'] = corpus.children.count()
for node_resource in Node_Resource.objects.filter(node=corpus): #just get first element of the corpora and get his type.
donut_part[node_resource.resource.type] += docs_count
list_corpora[node_resource.resource.type.name].append(corpus_view) resource_corpus = Node_Resource.objects.filter(node=corpus)
if len(resource_corpus)>0:
# print(Node_Resource.objects.filter(node=corpus).all())
corpus_type = Node_Resource.objects.filter(node=corpus)[0].resource.type
list_corpora[corpus_type].append(corpus_view)
donut_part[corpus_type] += docs_count
else: print(" Node_Resource = this.corpus(",corpus.pk,") ... nothing, why?")
## For avoiding to list repeated elements, like when u use the dynamic query (per each xml, 1)
# for node_resource in Node_Resource.objects.filter(node=corpus):
# print( "node_resource.id:",node_resource.id , node_resource.resource.file )
# donut_part[node_resource.resource.type] += docs_count
# list_corpora[node_resource.resource.type.name].append(corpus_view)
# print(node_resource.resource.type.name)
list_corpora = dict(list_corpora) list_corpora = dict(list_corpora)
if docs_total == 0 or docs_total is None: if docs_total == 0 or docs_total is None:
docs_total = 1 docs_total = 1
# The donut will show: percentage by
donut = [ {'source': key, donut = [ {'source': key,
'count': donut_part[key] , 'count': donut_part[key] ,
'part' : round(donut_part[key] * 100 / docs_total) } \ 'part' : round(donut_part[key] * 100 / docs_total) } \
...@@ -283,23 +300,21 @@ def project(request, project_id): ...@@ -283,23 +300,21 @@ def project(request, project_id):
if request.method == 'POST': if request.method == 'POST':
print("original file:")
print(request.FILES)
form = CustomForm(request.POST, request.FILES) form = CustomForm(request.POST, request.FILES)
if form.is_valid(): if form.is_valid():
name = form.cleaned_data['name'] name = form.cleaned_data['name']
thefile = form.cleaned_data['file'] thefile = form.cleaned_data['file']
resource_type = ResourceType.objects.get(id=str( form.cleaned_data['type'] )) print(request.POST['type'])
print(form.cleaned_data['type'])
resource_type = ResourceType.objects.get(name=str( form.cleaned_data['type'] ))
print("-------------") print("-------------")
print(name,"|",resource_type,"|",thefile) print(name,"|",resource_type,"|",thefile)
print("-------------") print("-------------")
print("new file:")
print(thefile)
try: try:
parent = Node.objects.get(id=project_id) parent = Node.objects.get(id=project_id)
...@@ -328,8 +343,6 @@ def project(request, project_id): ...@@ -328,8 +343,6 @@ def project(request, project_id):
corpus.save() corpus.save()
print(request.user, resource_type , thefile )
corpus.add_resource( corpus.add_resource(
user=request.user, user=request.user,
type=resource_type, type=resource_type,
...@@ -373,79 +386,6 @@ def project(request, project_id): ...@@ -373,79 +386,6 @@ def project(request, project_id):
else: else:
form = CustomForm() form = CustomForm()
# if request.method == 'POST':
# #form = CorpusForm(request.POST, request.FILES)
# #print(str(request.POST))
# name = str(request.POST['name'])
# try:
# resource_type = ResourceType.objects.get(id=str(request.POST['type']))
# except Exception as error:
# print(error)
# resource_type = None
# try:
# file = request.FILES['file']
# except Exception as error:
# print(error)
# file = None
# #if name != "" and resource_type is not None and file is not None:
# try:
# parent = Node.objects.get(id=project_id)
# node_type = NodeType.objects.get(name='Corpus')
# if resource_type.name == "europress_french":
# language = Language.objects.get(iso2='fr')
# elif resource_type.name == "europress_english":
# language = Language.objects.get(iso2='en')
# try:
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# language=language,
# name=name,
# )
# except:
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# name=name,
# )
# corpus.save()
# print(request.user, resource_type , file )
# print(corpus.language)
# corpus.add_resource(
# user=request.user,
# type=resource_type,
# file=file
# )
# try:
# #corpus.parse_and_extract_ngrams()
# #corpus.parse_and_extract_ngrams.apply_async((), countdown=3)
# if DEBUG is True:
# corpus.workflow()
# else:
# corpus.workflow.apply_async((), countdown=3)
# except Exception as error:
# print(error)
# return HttpResponseRedirect('/project/' + str(project_id))
# except Exception as error:
# print('ee', error)
# form = CorpusForm(request=request)
# formResource = ResourceForm()
# else:
# form = CorpusForm(request=request)
# formResource = ResourceForm()
return render(request, 'project.html', { return render(request, 'project.html', {
'form' : form, 'form' : form,
...@@ -874,9 +814,12 @@ def node_link(request, corpus_id): ...@@ -874,9 +814,12 @@ def node_link(request, corpus_id):
''' '''
Create the HttpResponse object with the node_link dataset. Create the HttpResponse object with the node_link dataset.
''' '''
import time
print("In node_link() START") print("In node_link() START")
start = time.time()
data = get_cooc(request=request, corpus_id=corpus_id, type="node_link") data = get_cooc(request=request, corpus_id=corpus_id, type="node_link")
end = time.time()
print ("LOG::TIME: get_cooc() [s]",(end - start))
print("In node_link() END") print("In node_link() END")
return JsonHttpResponse(data) return JsonHttpResponse(data)
......
...@@ -98,13 +98,10 @@ from django import forms ...@@ -98,13 +98,10 @@ from django import forms
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
class CustomForm(forms.Form): class CustomForm(forms.Form):
name = forms.CharField( label='Name', max_length=199 , required=True) name = forms.CharField( label='Name', max_length=199 , widget=forms.TextInput(attrs={ 'required': 'true' }))
parsing_options = ResourceType.objects.all().values_list('id', 'name') type = ModelChoiceField( ResourceType.objects.all() , widget=forms.Select(attrs={'onchange':'CustomForSelect( $("option:selected", this).text() );'}) )
type = forms.IntegerField( widget=forms.Select( choices= parsing_options) , required=True )
file = forms.FileField() file = forms.FileField()
# Description: clean_file() # Description: clean_file()
""" """
* file_.content_type - Example: ['application/pdf', 'image/jpeg'] * file_.content_type - Example: ['application/pdf', 'image/jpeg']
...@@ -120,12 +117,14 @@ class CustomForm(forms.Form): ...@@ -120,12 +117,14 @@ class CustomForm(forms.Form):
""" """
def clean_file(self): def clean_file(self):
file_ = self.cleaned_data.get('file') file_ = self.cleaned_data.get('file')
#Filename length from datetime import datetime
if len(file_.name)>30: file_.name = str(datetime.now().microsecond)
from datetime import datetime # #Filename length
file_.name = str(datetime.now().microsecond) # if len(file_.name)>30:
# raise forms.ValidationError(_('Come on dude, name too long. Now is:'+file_.name)) # from datetime import datetime
#File size # file_.name = str(datetime.now().microsecond)
# # raise forms.ValidationError(_('Come on dude, name too long. Now is:'+file_.name))
# #File size
if len(file_)>104857600: if len(file_)>104857600:
raise forms.ValidationError(_('File to heavy! (<100MB).')) raise forms.ValidationError(_('File to heavy! (<100MB).'))
## File type: ## File type:
......
...@@ -163,6 +163,7 @@ class Node(CTENode): ...@@ -163,6 +163,7 @@ class Node(CTENode):
for node_resource in self.node_resource.filter(parsed=False): for node_resource in self.node_resource.filter(parsed=False):
resource = node_resource.resource resource = node_resource.resource
parser = defaultdict(lambda:FileParser.FileParser, { parser = defaultdict(lambda:FileParser.FileParser, {
'istext' : ISText,
'pubmed' : PubmedFileParser, 'pubmed' : PubmedFileParser,
'isi' : IsiFileParser, 'isi' : IsiFileParser,
'ris' : RisFileParser, 'ris' : RisFileParser,
...@@ -171,6 +172,7 @@ class Node(CTENode): ...@@ -171,6 +172,7 @@ class Node(CTENode):
'europress_english' : EuropressFileParser, 'europress_english' : EuropressFileParser,
})[resource.type.name]() })[resource.type.name]()
metadata_list += parser.parse(str(resource.file)) metadata_list += parser.parse(str(resource.file))
# print(parser.parse(str(resource.file)))
# retrieve info from the database # retrieve info from the database
type_id = NodeType.objects.get(name='Document').id type_id = NodeType.objects.get(name='Document').id
langages_cache = LanguagesCache() langages_cache = LanguagesCache()
...@@ -183,6 +185,8 @@ class Node(CTENode): ...@@ -183,6 +185,8 @@ class Node(CTENode):
language = langages_cache[metadata_values['language_iso2']] if 'language_iso2' in metadata_values else None, language = langages_cache[metadata_values['language_iso2']] if 'language_iso2' in metadata_values else None,
if isinstance(language, tuple): if isinstance(language, tuple):
language = language[0] language = language[0]
# print("metadata_values:")
# print("\t",metadata_values,"\n- - - - - - - - - - - - ")
Node( Node(
user_id = user_id, user_id = user_id,
type_id = type_id, type_id = type_id,
...@@ -191,7 +195,6 @@ class Node(CTENode): ...@@ -191,7 +195,6 @@ class Node(CTENode):
language_id = language.id if language else None, language_id = language.id if language else None,
metadata = metadata_values metadata = metadata_values
).save() ).save()
# make metadata filterable # make metadata filterable
self.children.all().make_metadata_filterable() self.children.all().make_metadata_filterable()
...@@ -236,14 +239,34 @@ class Node(CTENode): ...@@ -236,14 +239,34 @@ class Node(CTENode):
@current_app.task(filter=task_method) @current_app.task(filter=task_method)
def workflow(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False): def workflow(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False):
print("In workflow() START") import time
print("LOG::TIME: In workflow() parse_resources()")
start = time.time()
self.metadata['Processing'] = 1 self.metadata['Processing'] = 1
self.save() self.save()
self.parse_resources() self.parse_resources()
end = time.time()
print ("LOG::TIME: parse_resources() [s]",(end - start))
print("LOG::TIME: In workflow() / parse_resources()")
start = time.time()
print("LOG::TIME: In workflow() extract_ngrams()")
type_document = NodeType.objects.get(name='Document') type_document = NodeType.objects.get(name='Document')
self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',]) self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
end = time.time()
print ("LOG::TIME: ",(end - start))
print ("LOG::TIME: extract_ngrams() [s]",(end - start))
print("LOG::TIME: In workflow() / extract_ngrams()")
start = time.time()
print("In workflow() do_tfidf()")
from analysis.functions import do_tfidf from analysis.functions import do_tfidf
do_tfidf(self) do_tfidf(self)
end = time.time()
print ("LOG::TIME: do_tfidf() [s]",(end - start))
print("LOG::TIME: In workflow() / do_tfidf()")
print("In workflow() END") print("In workflow() END")
self.metadata['Processing'] = 0 self.metadata['Processing'] = 0
self.save() self.save()
......
from django.db import transaction
from lxml import etree
from .FileParser import FileParser
from ..NgramsExtractors import *
from datetime import datetime
from io import BytesIO
import json
class ISText(FileParser):
def _parse(self, thefile):
json_data=open(thefile,"r")
data = json.load(json_data)
json_data.close()
json_docs = data["hits"]
metadata_list = []
metadata_path = {
"id" : "id",
"source" : 'corpusName',
"title" : 'title',
"genre" : "genre",
# "language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'doi',
"host" : 'host',
"publication_date" : 'pubdate',
# "authors" : 'author',
"authorsRAW" : 'author',
"keywords" : "keywords"
}
metadata = {}
import pprint
import datetime
for json_doc in json_docs:
for key, path in metadata_path.items():
try:
# print(path," ==> ",len(json_doc[path]))
metadata[key] = json_doc[path]
except: pass
# print("|",metadata["publication_date"])
if "doi" in metadata: metadata["doi"] = metadata["doi"][0]
keywords = []
if "keywords" in metadata:
for keyw in metadata["keywords"]:
keywords.append(keyw["value"] )
metadata["keywords"] = ", ".join( keywords )
moredate=False
moresource=False
if "host" in metadata:
if "genre" in metadata["host"] and len(metadata["host"]["genre"])>0:
if "genre" in metadata and len(metadata["genre"])==0:
metadata["genre"] = metadata["host"]["genre"]
# print(metadata["host"])
if "pubdate" in metadata["host"]:
onebuffer = metadata["publication_date"]
metadata["publication_date"] = []
metadata["publication_date"].append(onebuffer)
metadata["publication_date"].append( metadata["host"]["pubdate"] )
if "title" in metadata["host"]:
metadata["journal"] = metadata["host"]["title"]
authors=False
if "authorsRAW" in metadata:
names = []
for author in metadata["authorsRAW"]:
names.append(author["name"])
metadata["authors"] = ", ".join(names)
if "host" in metadata: metadata.pop("host")
if "genre" in metadata:
if len(metadata["genre"])==0:
metadata.pop("genre")
if "publication_date" in metadata and isinstance(metadata["publication_date"], list):
if len(metadata["publication_date"])>1:
d1 = metadata["publication_date"][0]
d2 = metadata["publication_date"][1]
# print("date1:",d1)
# print("date2:",d2)
if len(d1)==len(d2):
metadata["publication_date"] = d2
# if int(d1)>int(d2): metadata["publication_date"] = d2
else:
fulldate = ""
year = d2[:4]
fulldate+=year
if len(d2)>4:
month = d2[4:6]
fulldate+="-"+month
if len(d2)>6:
day = d2[6:8]
fulldate+="-"+day
metadata["publication_date"] = fulldate
else:
if "copyrightdate" in json_doc:
metadata["publication_date"] = json_doc["copyrightdate"]
else:
if "copyrightdate" in json_doc:
metadata["publication_date"] = json_doc["copyrightdate"]
print("||",metadata["title"])
metadata_list.append(metadata)
print("=============================")
print("\nlen list:",len(metadata_list))
return metadata_list
...@@ -2,13 +2,19 @@ from django.db import transaction ...@@ -2,13 +2,19 @@ from django.db import transaction
from lxml import etree from lxml import etree
from .FileParser import FileParser from .FileParser import FileParser
from ..NgramsExtractors import * from ..NgramsExtractors import *
from datetime import datetime
from io import BytesIO
class PubmedFileParser(FileParser): class PubmedFileParser(FileParser):
def _parse(self, file): def _parse(self, file):
# open the file as XML # open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True) xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
xml = etree.parse(file, parser=xml_parser)
xml = ""
if type(file)==bytes: xml = etree.parse( BytesIO(file) , parser=xml_parser)
else: xml = etree.parse(file, parser=xml_parser)
xml_articles = xml.findall('PubmedArticle') xml_articles = xml.findall('PubmedArticle')
# initialize the list of metadata # initialize the list of metadata
metadata_list = [] metadata_list = []
...@@ -19,9 +25,13 @@ class PubmedFileParser(FileParser): ...@@ -19,9 +25,13 @@ class PubmedFileParser(FileParser):
metadata_path = { metadata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title', "journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle', "title" : 'MedlineCitation/Article/ArticleTitle',
"title2" : 'MedlineCitation/Article/VernacularTitle',
"language_iso3" : 'MedlineCitation/Article/Language', "language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]', "doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText', "realdate_full_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate',
"realdate_year_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Year',
"realdate_month_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Month',
"realdate_day_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Day',
"publication_year" : 'MedlineCitation/DateCreated/Year', "publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month', "publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day', "publication_day" : 'MedlineCitation/DateCreated/Day',
...@@ -30,6 +40,7 @@ class PubmedFileParser(FileParser): ...@@ -30,6 +40,7 @@ class PubmedFileParser(FileParser):
for key, path in metadata_path.items(): for key, path in metadata_path.items():
try: try:
xml_node = xml_article.find(path) xml_node = xml_article.find(path)
# Authors tag
if key == 'authors': if key == 'authors':
metadata[key] = ', '.join([ metadata[key] = ', '.join([
xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text
...@@ -37,8 +48,53 @@ class PubmedFileParser(FileParser): ...@@ -37,8 +48,53 @@ class PubmedFileParser(FileParser):
]) ])
else: else:
metadata[key] = xml_node.text metadata[key] = xml_node.text
except: except:
pass pass
#Title-Decision
Title=""
if not metadata["title"] or metadata["title"]=="":
if "title2" in metadata:
metadata["title"] = metadata["title2"]
else: metadata["title"] = ""
# Date-Decision
# forge.iscpif.fr/issues/1418
RealDate = ""
if "realdate_full_" in metadata:
RealDate = metadata["realdate_full_"]
else:
if "realdate_year_" in metadata: RealDate+=metadata["realdate_year_"]
if "realdate_month_" in metadata: RealDate+=" "+metadata["realdate_month_"]
if "realdate_day_" in metadata: RealDate+=" "+metadata["realdate_day_"]
metadata["realdate_full_"] = RealDate
RealDate = RealDate.split("-")[0]
PubmedDate = ""
if "publication_year" in metadata: PubmedDate+=metadata["publication_year"]
if "publication_month" in metadata: PubmedDate+=" "+metadata["publication_month"]
if "publication_day" in metadata: PubmedDate+=" "+metadata["publication_day"]
Decision=""
if len(RealDate)>4:
if len(RealDate)>8:
try: Decision = datetime.strptime(RealDate, '%Y %b %d').date()
except: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
else:
try: Decision = datetime.strptime(RealDate, '%Y %b').date()
except: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
else: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
if "publication_year" in metadata: metadata["publication_year"] = str(Decision.year)
if "publication_month" in metadata: metadata["publication_month"] = str(Decision.month)
if "publication_day" in metadata: metadata["publication_day"] = str(Decision.day)
if "realdate_year_" in metadata: metadata.pop("realdate_year_")
if "realdate_month_" in metadata: metadata.pop("realdate_month_")
if "realdate_day_" in metadata: metadata.pop("realdate_day_")
if "title2" in metadata: metadata.pop("title2")
# print(metadata)
metadata_list.append(metadata) metadata_list.append(metadata)
# return the list of metadata # return the list of metadata
return metadata_list return metadata_list
...@@ -2,3 +2,4 @@ from parsing.FileParsers.RisFileParser import RisFileParser ...@@ -2,3 +2,4 @@ from parsing.FileParsers.RisFileParser import RisFileParser
from parsing.FileParsers.IsiFileParser import IsiFileParser from parsing.FileParsers.IsiFileParser import IsiFileParser
from parsing.FileParsers.PubmedFileParser import PubmedFileParser from parsing.FileParsers.PubmedFileParser import PubmedFileParser
from parsing.FileParsers.EuropressFileParser import EuropressFileParser from parsing.FileParsers.EuropressFileParser import EuropressFileParser
from parsing.FileParsers.ISText import ISText
# ****************************
# ***** Medline Fetcher *****
# ****************************
# MEDLINE USER REQUIREMENT : Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time weekdays
import sys
if sys.version_info >= (3, 0): from urllib.request import urlopen
else: from urllib import urlopen
import os
import time
# import libxml2
from lxml import etree
from datetime import datetime
from django.core.files import File
import threading
from queue import Queue
import time
class MedlineFetcher:
def __init__(self):
self.queue_size = 8
self.q = Queue()
self.firstResults = []
self.lock = threading.Lock() # lock to serialize console output
self.pubMedEutilsURL = 'http://www.ncbi.nlm.nih.gov/entrez/eutils'
self.pubMedDB = 'Pubmed'
self.reportType = 'medline'
# Return the globalResults!:
# - count =
# - queryKey =
# - webEnv =
def medlineEsearch(self , query):
# print ("MedlineFetcher::medlineEsearch :")
"Get number of results for query 'query' in variable 'count'"
"Get also 'queryKey' and 'webEnv', which are used by function 'medlineEfetch'"
origQuery = query
query = query.replace(' ', '%20')
eSearch = '%s/esearch.fcgi?db=%s&retmax=1&usehistory=y&term=%s' %(self.pubMedEutilsURL, self.pubMedDB, query)
eSearchResult = urlopen(eSearch)
data = eSearchResult.read()
root = etree.XML(data)
findcount = etree.XPath("/eSearchResult/Count/text()")
count = findcount(root)[0]
findquerykey = etree.XPath("/eSearchResult/QueryKey/text()")
queryKey = findquerykey(root)[0]
findwebenv = etree.XPath("/eSearchResult/WebEnv/text()")
webEnv = findwebenv(root)[0]
values = { "query":origQuery , "count": int(str(count)), "queryKey": queryKey , "webEnv":webEnv }
return values
# RETMAX:
# Total number of UIDs from the retrieved set to be shown in the XML output (default=20)
# maximum of 100,000 records
def medlineEfetchRAW( self , fullquery):
query = fullquery["string"]
retmax = fullquery["retmax"]
count = fullquery["count"]
queryKey = fullquery["queryKey"]
webEnv = fullquery["webEnv"]
"Fetch medline result for query 'query', saving results to file every 'retmax' articles"
queryNoSpace = query.replace(' ', '') # No space in directory and file names, avoids stupid errors
print ("LOG::TIME: ",'medlineEfetchRAW :Query "' , query , '"\t:\t' , count , ' results')
retstart = 0
eFetch = '%s/efetch.fcgi?email=youremail@example.org&rettype=%s&retmode=xml&retstart=%s&retmax=%s&db=%s&query_key=%s&WebEnv=%s' %(self.pubMedEutilsURL, self.reportType, retstart, retmax, self.pubMedDB, queryKey, webEnv)
return eFetch
def ensure_dir(self , f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
# generic!
def downloadFile(self, item):
url = item[0]
filename = item[1]
print("\tin downloadFile:")
print(url,filename)
data = urlopen(url)
f = open(filename, 'w')
myfile = File(f)
myfile.write( data.read().decode('utf-8') )
myfile.close()
f.close()
with self.lock:
print(threading.current_thread().name, filename+" OK")
return filename
# generic!
def do_work(self,item):
# time.sleep(1) # pretend to do some lengthy work.
returnvalue = self.medlineEsearch(item)
with self.lock:
print(threading.current_thread().name, item)
return returnvalue
# The worker thread pulls an item from the queue and processes it
def worker(self):
while True:
item = self.q.get()
self.firstResults.append(self.do_work(item))
self.q.task_done()
def worker2(self):
while True:
item = self.q.get()
self.firstResults.append(self.downloadFile(item))
self.q.task_done()
def chunks(self , l , n):
print("chunks:")
for i in range(0, len(l), n):
yield l[i:i+n]
# GLOBALLIMIT:
# I will retrieve this exact amount of publications.
# The publications per year i'll retrieve per year will be = (k/N)*GlobalLimit <- i'll use this as RETMAX
# - k : Number of publications of x year (according to pubmed)
# - N : Sum of every k belonging to {X} (total number of pubs according to pubmed)
# - GlobalLimit : Number of publications i want.
def serialFetcher(self , yearsNumber , query, globalLimit):
# Create the queue and thread pool.
for i in range(self.queue_size):
t = threading.Thread(target=self.worker)
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
start = time.perf_counter()
N = 0
print ("MedlineFetcher::serialFetcher :")
thequeries = []
globalresults = []
for i in range(yearsNumber):
year = str(2015 - i)
print ('YEAR ' + year)
print ('---------\n')
pubmedquery = str(year) + '[dp] '+query
self.q.put( pubmedquery ) #put task in the queue
self.q.join()
print('time:',time.perf_counter() - start)
for globalresults in self.firstResults:
# globalresults = self.medlineEsearch(pubmedquery)
if globalresults["count"]>0:
N+=globalresults["count"]
querymetadata = {
"string": globalresults["query"] ,
"count": globalresults["count"] ,
"queryKey":globalresults["queryKey"] ,
"webEnv":globalresults["webEnv"] ,
"retmax":0
}
thequeries.append ( querymetadata )
print("Total Number:", N,"publications")
print("And i want just:",globalLimit,"publications")
print("---------------------------------------\n")
for i,query in enumerate(thequeries):
k = query["count"]
percentage = k/float(N)
retmax_forthisyear = int(round(globalLimit*percentage))
query["retmax"] = retmax_forthisyear
return thequeries
from django.contrib import admin
# Register your models here.
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
from django.shortcuts import redirect
from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect
from django.template.loader import get_template
from django.template import Context
from django.contrib.auth.models import User
from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
from gargantext_web.api import JsonHttpResponse
from urllib.request import urlopen, urlretrieve
import json
from gargantext_web.settings import MEDIA_ROOT
from datetime import datetime
import time
import os
import threading
from django.core.files import File
from gargantext_web.settings import DEBUG
from node.models import Language, ResourceType, Resource, \
Node, NodeType, Node_Resource, Project, Corpus, \
Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram
def getGlobalStats(request ):
print(request.method)
alist = ["bar","foo"]
if request.method == "POST":
query = request.POST["query"]
print ("LOG::TIME: query =", query )
print ("LOG::TIME: N =", 300 )
instancia = MedlineFetcher()
# alist = instancia.serialFetcher( 5, query , int(request.POST["N"]) )
alist = instancia.serialFetcher( 5, query , 300 )
data = alist
return JsonHttpResponse(data)
def doTheQuery(request , project_id):
alist = ["hola","mundo"]
if request.method == "POST":
query = request.POST["query"]
name = request.POST["string"]
instancia = MedlineFetcher()
thequeries = json.loads(query)
urlreqs = []
for yearquery in thequeries:
urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
alist = ["tudo fixe" , "tudo bem"]
"""
urlreqs: List of urls to query.
- Then, to each url in urlreqs you do:
eFetchResult = urlopen(url)
eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
"""
thefile = "how we do this here?"
resource_type = ResourceType.objects.get(name="pubmed" )
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=name,
)
corpus.save()
try:
tasks = MedlineFetcher()
tasks.ensure_dir ( MEDIA_ROOT + '/corpora/'+str(request.user)+"/" )
# configuring your queue with the event
for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished
for filename in tasks.firstResults:
corpus.add_resource( user=request.user, type=resource_type, file=filename )
# do the WorkFlow
try:
if DEBUG is True:
corpus.workflow()
else:
corpus.workflow.apply_async((), countdown=3)
return JsonHttpResponse(["workflow","finished"])
except Exception as error:
print(error)
return JsonHttpResponse(["workflow","finished","outside the try-except"])
except Exception as error:
print("lele",error)
data = alist
return JsonHttpResponse(data)
def testISTEX(request , project_id):
print(request.method)
alist = ["bar","foo"]
if request.method == "POST":
# print(alist)
query = "-"
query_string = "-"
N = 60
if "query" in request.POST: query = request.POST["query"]
if "string" in request.POST: query_string = request.POST["string"].replace(" ","+")
# if "N" in request.POST: N = request.POST["N"]
print(query_string , query , N)
urlreqs = []
pagesize = 50
tasks = MedlineFetcher()
chunks = list(tasks.chunks(range(N), pagesize))
for k in chunks:
if (k[0]+pagesize)>N: pagesize = N-k[0]
urlreqs.append("http://api.istex.fr/document/?q="+query_string+"&output=*&"+"from="+str(k[0])+"&size="+str(pagesize))
print(urlreqs)
# urlreqs = ["http://localhost/374255" , "http://localhost/374278" ]
resource_type = ResourceType.objects.get(name="istext" )
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=query,
)
corpus.save()
# configuring your queue with the event
for i in range(8):
t = threading.Thread(target=tasks.worker2) #thing to do
t.daemon = True # thread dies when main thread (only non-daemon thread) exits.
t.start()
for url in urlreqs:
filename = MEDIA_ROOT + '/corpora/%s/%s' % (request.user, str(datetime.now().microsecond))
tasks.q.put( [url , filename]) #put a task in th queue
tasks.q.join() # wait until everything is finished
for filename in tasks.firstResults:
corpus.add_resource( user=request.user, type=resource_type, file=filename )
corpus.save()
# do the WorkFlow
try:
if DEBUG is True:
corpus.workflow()
else:
corpus.workflow.apply_async((), countdown=3)
return JsonHttpResponse(["workflow","finished"])
except Exception as error:
print(error)
data = [query_string,query,N]
return JsonHttpResponse(data)
...@@ -11,7 +11,15 @@ ...@@ -11,7 +11,15 @@
<script src="{% static "js/raphael-min.js"%}"></script> <script src="{% static "js/raphael-min.js"%}"></script>
<script src="{% static "js/morris.min.js"%}"></script> <script src="{% static "js/morris.min.js"%}"></script>
<link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css">
<style>
.ui-autocomplete {
z-index: 5000;
}
.ui-autocomplete .ui-menu-item
{ font-size:x-small;}
</style>
{% endblock %} {% endblock %}
...@@ -25,7 +33,7 @@ ...@@ -25,7 +33,7 @@
<div class="col-md-6"> <div class="col-md-6">
{% if project %} {% if project %}
<h1>{{ project.name }}</h1> <h1>{{ project.name }}</h1>
<!--<h3> {{number}} corpora </h3>--!> <!--<h3> {{number}} corpora </h3>-->
{% endif %} {% endif %}
</div> </div>
...@@ -35,23 +43,19 @@ ...@@ -35,23 +43,19 @@
<div id="hero-donut" style="height: 200px;"></div> <div id="hero-donut" style="height: 200px;"></div>
{% endif %} {% endif %}
<center> <center>
<button
type="button" <a data-toggle="modal" href="#addcorpus">
class="btn btn-primary btn-lg" <button
data-container="body" type="button"
data-toggle="popover" class="btn btn-primary btn-lg"
data-placement="bottom" data-container="body"
>Add a corpus</button> data-toggle="popover"
<div id="popover-content" class="hide"> data-placement="bottom"
>Add a corpus
<form enctype="multipart/form-data" action="/project/{{project.id}}/" method="post"> </button>
{% csrf_token %} </a>
{{ form.non_field_errors }} <!-- <div id="popover-content" class="hide"> -->
{{ form.as_p}}
{{ formResource.non_field_errors }}
{{ formResource.as_p}}
<input onclick='$("#semLoader").css("visibility", "visible"); $("#semLoader").show();' type="submit" name="submit" id="submit" class="btn" value="Add this corpus" /><div>
</center> </center>
</p> </p>
...@@ -147,7 +151,314 @@ ...@@ -147,7 +151,314 @@
</div> </div>
<!-- Modal -->
<div class="modal fade" id="stack1" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3>Query to PubMed</h3>
</div>
<div class="modal-body">
<p>One fine body…</p>
<input id="daquery" type="text" class="input-lg" data-tabindex="2">
<a onclick="getGlobalResults();" class="btn">Scan</a>
<div id="results"></div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button onclick="doTheQuery();" disabled id="id_thebutton" type="button" class="btn btn-primary">Explore a sample!</button>
</div>
</div><!-- /.modal-content -->
</div><!-- /.modal-dialog -->
</div><!-- /.modal -->
<!-- Modal -->
<div class="modal fade" id="addcorpus" tabindex="-1" role="dialog" aria-labelledby="myModalLabel2" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3>Add a Corpus</h3>
</div>
<div class="modal-body">
<form id="id_form" enctype="multipart/form-data" action="/project/{{project.id}}/" method="post">
{% csrf_token %}
<table cellpadding="5">
{% for field in form %}
<tr>
<th>{{field.label_tag}}</th>
<td>
{{ field.errors }}
{{ field }}
{% if field.name == "name" %}
<span onclick="getGlobalResults(this);" id="scanpubmed"></span><div id="theresults"></div>
{% endif %}
</td>
</tr>
{% endfor %}
<tr>
<th></th>
<td>
<div id="pubmedcrawl" style="visibility: hidden;">
Do you have a file already? &nbsp;
<input type="radio" id="file_yes" name="file1" onclick="FileOrNotFile(this.value);" class="file1" value="true" checked>Yes </input>
<input type="radio" id="file_no" name="file1" onclick="FileOrNotFile(this.value);" class="file1" value="false">No </input>
</div>
</td>
</tr>
</table>
</form>
<div class="modal-footer">
<!-- <div id="pubmedcrawl" align="right" style="visibility: hidden;"><a data-toggle="modal" href="#stack1">&#10142; Query directly in PubMed</a></div> -->
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button onclick='bringDaNoise();' id="submit_thing" disabled class="btn btn-primary" >Process this!</button><span id="simpleloader"></span>
</div>
</div>
</div><!-- /.modal-content -->
</div><!-- /.modal-dialog -->
</div><!-- /.modal -->
<script src="http://code.jquery.com/ui/1.11.2/jquery-ui.js"></script>
<script> <script>
function getCookie(name) {
var cookieValue = null;
if (document.cookie && document.cookie != '') {
var cookies = document.cookie.split(';');
for (var i = 0; i < cookies.length; i++) {
var cookie = jQuery.trim(cookies[i]);
// Does this cookie string begin with the name we want?
if (cookie.substring(0, name.length + 1) == (name + '=')) {
cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
break;
}
}
}
return cookieValue;
}
var thequeries = []
function doTheQuery() {
if ( $('#submit_thing').prop('disabled') ) return;
console.log("in doTheQuery:");
var origQuery = $("#id_name").val()
var pubmedifiedQuery = { query : JSON.stringify(thequeries) , string: origQuery } ;
console.log(pubmedifiedQuery)
var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/project/"+projectid+"/pubmedquery/go",
data: pubmedifiedQuery,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in doTheQuery()")
console.log(data)
location.reload();
},
error: function(result) {
console.log("in doTheQuery(). Data not found");
}
});
}
function bringDaNoise() {
var theresults = $("#theresults").html()
if( theresults && theresults.search("No results")==-1 ) {
console.log("we've in dynamic mode")
$("#simpleloader").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
$("#submit_thing").prop('onclick',null);
doTheQuery();
}
else {
console.log("we dont have nothing inside results div")
if ( $("#id_file").is(':visible') ) {
console.log("we're in upload-file mode")
var namefield = $("#id_name").val()!=""
var typefield = $("#id_type").val()!=""
var filefield = $("#id_file").val()!=""
if( namefield && typefield && filefield ) {
$("#simpleloader").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
$("#submit_thing").prop('onclick',null);
$( "#id_form" ).submit();
}
}
}
}
function getGlobalResults(value){
console.log("in getGlobalResults()")
// AJAX to django
var pubmedquery = $("#id_name").val()
var Npubs = $("#id_N").val();
if(pubmedquery=="") return;
var formData = {query:pubmedquery , N:Npubs}
$("#theresults").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
console.log("disabling "+"#"+value.id)
$("#"+value.id).prop('onclick',null);
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/pubmedquery",
data: formData,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in getGlobalResults")
console.log(data)
console.log("enabling "+"#"+value.id)
$("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#submit_thing").prop('disabled' , false)
thequeries = data
var N=0,k=0;
for(var i in thequeries) N += thequeries[i].count
if( N>0) {
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: "+N+" publications in the last 5 years</i><br>")
$('#submit_thing').prop('disabled', false);
} else {
$("#theresults").html("<i> <b>"+pubmedquery+"</b>: No results!.</i><br>")
$('#submit_thing').prop('disabled', true);
}
},
error: function(result) {
console.log("Data not found");
}
});
}
// CSS events for selecting one Radio-Input
function FileOrNotFile( value ) {
var showfile = JSON.parse(value)
// @upload-file events
if (showfile) {
console.log("You've clicked the YES")
$("#id_file").show()
$('label[for=id_file]').show();
$("#id_name").attr("placeholder", "");
$("#scanpubmed").html("")
$("#theresults").html("")
$('#submit_thing').prop('disabled', false);
$( "#id_name" ).on('input',null);
}
// @dynamic-query events
else {
console.log("You've clicked the NO")
$("#id_file").hide()
$('label[for=id_file]').hide();
$("#id_name").attr("placeholder", " [ Enter your query here ] ");
$("#id_name").focus();
$("#scanpubmed").html('<a class="btn btn-primary">Scan</a>')//+'Get: <input id="id_N" size="2" type="text"></input>')
$("#theresults").html("")
$("#submit_thing").prop('disabled' , true)
$( "#id_name" ).on('input',function(e){
console.log($(this).val())
testAjax( $(this).val() )
});
}
}
//CSS events for changing the Select element
function CustomForSelect( selected ) {
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
if(selected=="pubmed") {
console.log("show the button")
$("#pubmedcrawl").css("visibility", "visible");
$("#pubmedcrawl").show();
$("#file_yes").click();
}
// hide Radio-Inputs and trigger @upload-file events
else {
console.log("hide the button")
$("#pubmedcrawl").css("visibility", "hidden");
$("#id_file").show()
$('label[for=id_file]').show();
FileOrNotFile( "true" )
}
}
var LastData = []
function NSuggest_CreateData(q, data) {
console.log("in the new NSuggest_CreateData:")
LastData = data;
// console.log(LastData)
console.log("adding class ui-widget")
$("#id_name").removeClass( "ui-widget" ).addClass( "ui-widget" )
$( "#id_name" ).autocomplete({
source: LastData
});
return data;
}
function testAjax( query ) {
LastData = []
if(!query || query=="") return;
var pubmedquery = encodeURIComponent(query)
$.ajax({
type: 'GET',
url: "http://www.ncbi.nlm.nih.gov/portal/utils/autocomp.fcgi?dict=pm_related_queries_2&q="+pubmedquery,
// data:"db="+db+"&query="+query,
contentType: "application/json",
dataType: 'jsonp'
});
return false;
}
function testISTEX(query,Npubs) {
console.log("in testISTEX:");
if(!query || query=="") return;
var origQuery = query
var pubmedifiedQuery = { query : query , string: query }
// console.log(pubmedifiedQuery)
var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/project/"+projectid+"/ISTEXquery/go",
data: pubmedifiedQuery,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("ajax_success: in testISTEX()")
console.log(data)
location.reload();
},
error: function(result) {
console.log("in testISTEX(). Data not found");
}
});
}
// Morris Donut Chart // Morris Donut Chart
Morris.Donut({ Morris.Donut({
element: 'hero-donut', element: 'hero-donut',
...@@ -163,6 +474,7 @@ ...@@ -163,6 +474,7 @@
//colors: ["#30a1ec", "#76bdee"], //colors: ["#30a1ec", "#76bdee"],
formatter: function (y) { return y + "%" } formatter: function (y) { return y + "%" }
}); });
</script> </script>
......
{% extends "menu.html" %}
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/morris.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/jquery.easy-pie-chart.css"%}">
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<script src="{% static "js/raphael-min.js"%}"></script>
<script src="{% static "js/morris.min.js"%}"></script>
{% endblock %}
{% block content %}
<div class="container theme-showcase" role="main">
<div class="jumbotron">
<div class="row">
<div class="col-md-6">
{% if project %}
<h1>{{ project.name }}</h1>
<!--<h3> {{number}} corpora </h3>-->
{% endif %}
</div>
<div class="col-md-4">
<p>
{% if donut %}
<div id="hero-donut" style="height: 200px;"></div>
{% endif %}
<center>
<button
type="button"
class="btn btn-primary btn-lg"
data-container="body"
data-toggle="popover"
data-placement="bottom"
>Add a corpus</button>
<div id="popover-content" class="hide">
<form enctype="multipart/form-data" action="/project/{{project.id}}/" method="post">
{% csrf_token %}
{{ form.non_field_errors }}
{{ form.as_p}}
{{ formResource.non_field_errors }}
{{ formResource.as_p}}
<input onclick='$("#semLoader").css("visibility", "visible"); $("#semLoader").show();' type="submit" name="submit" id="submit" class="btn" value="Add this corpus" /><div>
</center>
</p>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Add jumbotron container for each type of corpus (presse, science etc.) -->
<div id="semLoader" style="position:absolute; top:50%; left:40%; width:80px; visibility: hidden;">
<img src="{% static "js/libs/img2/loading-bar.gif" %}"></img>
</div>
<div class="container">
{% if list_corpora %}
<h1>Resources</h1>
<h2>Corpora</h2>
<ul>
{% for key, corpora in list_corpora.items %}
<li>{{ key }}</li>
<ul>
{% for corpus in corpora %}
<li> {% ifnotequal corpus.count 0 %}
<a href="/project/{{project.id}}/corpus/{{corpus.id}}">
{{corpus.name}}
</a>
, {{ corpus.count }} Documents
{% else %}
{{corpus.name}} : <img width="20px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img> Processing, drink a cup of tea, and refresh the page :)
{% endifnotequal %}
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content='
<ul>
<li> Rename </li>
<li> Add new documents </li>
<li><a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete</a></li>
</ul>
'>Manage</button>
</li>
{% endfor %}
</ul>
{% endfor %}
</ul>
{% endif %}
{% if list_corporax %}
<div class="col-md-4">
<h3><a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a>
</h3>
<h4>{{ corpus.count }} Documents </h4>
<h5>Activity:</h5>
<div class="chart" data-percent="73">73%</div>
</div>
{% endif %}
{% if whitelists %}
<h2>Lists of Ngrams</h2>
<h3>White Lists</h2>
{% for list in whitelists %}
<ul>
<li> {{list.name }}
</ul>
{% endfor %}
{% endif %}
{% if whitelists %}
<h3>Black Lists</h2>
{% for list in blacklists %}
<ul>
<li> {{list.name }}
</ul>
{% endfor %}
{% endif %}
{% if cooclists %}
<h2>Results (graphs)</h2>
<h3>Cooccurrences Lists</h2>
{% for list in cooclists %}
<ul>
<li> {{list.name }}
</ul>
{% endfor %}
{% endif %}
</div>
<script>
// Morris Donut Chart
Morris.Donut({
element: 'hero-donut',
data: [
{% if donut %}
{% for part in donut %}
{label: '{{ part.source }}', value: {{ part.part }} },
{% endfor %}
{% endif %}
],
colors: ["@white", "@white"],
//colors: ["#30a1ec", "#76bdee"],
formatter: function (y) { return y + "%" }
});
</script>
{% endblock %}
...@@ -63,7 +63,7 @@ function deleteDuplicates(url) { ...@@ -63,7 +63,7 @@ function deleteDuplicates(url) {
success: function(data) { success: function(data) {
console.log("in DeleteDuplicates") console.log("in DeleteDuplicates")
console.log(data) console.log(data)
$("#delAll").remove(); location.reload();
}, },
error: function(result) { error: function(result) {
console.log("Data not found"); console.log("Data not found");
......
...@@ -240,6 +240,7 @@ ...@@ -240,6 +240,7 @@
<li ng-repeat="filter in filters"> <li ng-repeat="filter in filters">
<button ng-click="removeFilter($index)" title="remove this filter">x</button> <button ng-click="removeFilter($index)" title="remove this filter">x</button>
<span>...where the </span> <span>...where the </span>
<select ng-model="filter.entity" ng-options="entity as entity.key for entity in entities"></select> <select ng-model="filter.entity" ng-options="entity as entity.key for entity in entities"></select>
<span ng-if="filter.entity.key != 'ngrams'"> <span ng-if="filter.entity.key != 'ngrams'">
<select ng-if="filter.entity" ng-model="filter.column" ng-options="column as column.key for column in filter.entity.columns | orderBy:'key'"></select> <select ng-if="filter.entity" ng-model="filter.column" ng-options="column as column.key for column in filter.entity.columns | orderBy:'key'"></select>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment