Commit 44dae6cb authored by PkSM3's avatar PkSM3

[UPDATED] give up 'cause of the framework

parent 4a71627b
...@@ -269,11 +269,13 @@ from analysis.tfidf import tfidf ...@@ -269,11 +269,13 @@ from analysis.tfidf import tfidf
def do_tfidf(corpus, reset=True): def do_tfidf(corpus, reset=True):
print("doing tfidf") print("doing tfidf")
print("\t",corpus.type)
with transaction.atomic(): with transaction.atomic():
if reset==True: if reset==True:
NodeNodeNgram.objects.filter(nodex=corpus).delete() NodeNodeNgram.objects.filter(nodex=corpus).delete()
if isinstance(corpus, Node) and corpus.type.name == "Corpus": if isinstance(corpus, Node) and corpus.type.name == "Corpus":
print(Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")))
for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")): for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
for node_ngram in Node_Ngram.objects.filter(node=document): for node_ngram in Node_Ngram.objects.filter(node=document):
try: try:
......
...@@ -3,13 +3,18 @@ from lxml import etree ...@@ -3,13 +3,18 @@ from lxml import etree
from .FileParser import FileParser from .FileParser import FileParser
from ..NgramsExtractors import * from ..NgramsExtractors import *
from datetime import datetime from datetime import datetime
from io import BytesIO
class PubmedFileParser(FileParser): class PubmedFileParser(FileParser):
def _parse(self, file): def _parse(self, file):
# open the file as XML # open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True) xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
xml = etree.parse(file, parser=xml_parser)
xml = ""
if type(file)==bytes: xml = etree.parse( BytesIO(file) , parser=xml_parser)
else: xml = etree.parse(file, parser=xml_parser)
xml_articles = xml.findall('PubmedArticle') xml_articles = xml.findall('PubmedArticle')
# initialize the list of metadata # initialize the list of metadata
metadata_list = [] metadata_list = []
...@@ -75,7 +80,7 @@ class PubmedFileParser(FileParser): ...@@ -75,7 +80,7 @@ class PubmedFileParser(FileParser):
if "realdate_year_" in metadata: metadata.pop("realdate_year_") if "realdate_year_" in metadata: metadata.pop("realdate_year_")
if "realdate_month_" in metadata: metadata.pop("realdate_month_") if "realdate_month_" in metadata: metadata.pop("realdate_month_")
if "realdate_day_" in metadata: metadata.pop("realdate_day_") if "realdate_day_" in metadata: metadata.pop("realdate_day_")
metadata_list.append(metadata) metadata_list.append(metadata)
# return the list of metadata # return the list of metadata
return metadata_list return metadata_list
...@@ -4,10 +4,12 @@ from django.shortcuts import render ...@@ -4,10 +4,12 @@ from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect from django.http import Http404, HttpResponse, HttpResponseRedirect
from django.template.loader import get_template from django.template.loader import get_template
from django.template import Context from django.template import Context
from django.contrib.auth.models import User
from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher from scrap_pubmed.MedlineFetcherDavid2015 import MedlineFetcher
from gargantext_web.api import JsonHttpResponse from gargantext_web.api import JsonHttpResponse
from urllib.request import urlopen, urlretrieve
import json import json
from node.models import Language, ResourceType, Resource, \ from node.models import Language, ResourceType, Resource, \
...@@ -28,10 +30,13 @@ def getGlobalStats(request ): ...@@ -28,10 +30,13 @@ def getGlobalStats(request ):
return JsonHttpResponse(data) return JsonHttpResponse(data)
from parsing.FileParsers import PubmedFileParser
def doTheQuery(request , project_id): def doTheQuery(request , project_id):
alist = ["hola","mundo"] alist = ["hola","mundo"]
if request.method == "POST": if request.method == "POST":
query = request.POST["query"] query = request.POST["query"]
name = request.POST["string"] name = request.POST["string"]
...@@ -42,12 +47,7 @@ def doTheQuery(request , project_id): ...@@ -42,12 +47,7 @@ def doTheQuery(request , project_id):
urlreqs = [] urlreqs = []
for yearquery in thequeries: for yearquery in thequeries:
print("fetching:")
print(yearquery)
urlreqs.append( instancia.medlineEfetchRAW( yearquery ) ) urlreqs.append( instancia.medlineEfetchRAW( yearquery ) )
print(" - - - - - ")
print( "============================" )
print(urlreqs)
alist = ["tudo fixe" , "tudo bem"] alist = ["tudo fixe" , "tudo bem"]
""" """
...@@ -56,47 +56,71 @@ def doTheQuery(request , project_id): ...@@ -56,47 +56,71 @@ def doTheQuery(request , project_id):
eFetchResult = urlopen(url) eFetchResult = urlopen(url)
eFetchResult.read() # this will output the XML... normally you write this to a XML-file. eFetchResult.read() # this will output the XML... normally you write this to a XML-file.
""" """
# print("finding out project ID:")
# print(project_id)
# thefile = "how we do this here?"
# resource_type = ResourceType()
# resource_type.name = name
# print("-------------")
# print(name,"|",resource_type,"|",thefile)
# print("-------------")
# print(request.user)
# try:
# parent = Node.objects.get(id=project_id)
# print("IMMA HEEEEERE 01")
# node_type = NodeType.objects.get(name='Corpus')
# print("IMMA HEEEEERE 02")
# corpus = Node(
# user=request.user,
# parent=parent,
# type=node_type,
# name=name,
# )
# print("IMMA HEEEEERE 03")
# corpus.save()
# print("IMMA HEEEEERE 04")
# corpus.add_resource(
# user=request.user,
# type=resource_type,
# file=urlreqs
# )
# print("IMMA HEEEEERE 05")
# except Exception as error:
# print(error)
thefile = "how we do this here?"
resource_type = ResourceType()
resource_type.name = name
try:
parent = Node.objects.get(id=project_id)
node_type = NodeType.objects.get(name='Corpus')
type_id = NodeType.objects.get(name='Document').id
user_id = User.objects.get( username=request.user ).id
corpus = Node(
user=request.user,
parent=parent,
type=node_type,
name=name,
)
corpus.save()
parser = PubmedFileParser()
metadata_list = []
for url in urlreqs:
data = urlopen(url)
metadata_list += parser.parse( data.read() )
# corpus.add_resource( user=request.user, type=resource_type, file=data.read() )
break
from parsing.Caches import LanguagesCache
langages_cache = LanguagesCache()
for i, metadata_values in enumerate(metadata_list):
name = metadata_values.get('title', '')[:200]
language = langages_cache[metadata_values['language_iso2']] if 'language_iso2' in metadata_values else None,
if isinstance(language, tuple):
language = language[0]
Node(
user_id = user_id,
type_id = type_id,
name = name,
parent = parent,
language_id = language.id if language else None,
metadata = metadata_values
).save()
parent.children.all().make_metadata_filterable()
type_document = NodeType.objects.get(name='Document')
print("printing here 01")
parent.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
print("printing here 02")
print("now we've to apply do_tfidf...")
# thetitles = parent.children.filter(type_id=type_document.pk)
# print(Node.objects.filter(parent=parent))
# from analysis.functions import do_tfidf
# do_tfidf(corpus)
print("ca va?")
except Exception as error:
print("lele",error)
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
{{ formResource.non_field_errors }} {{ formResource.non_field_errors }}
{{ formResource.as_p}} {{ formResource.as_p}}
<input onclick='$("#semLoader").css("visibility", "visible"); $("#semLoader").show();' type="submit" name="submit" id="submit" class="btn" value="Add this corpus" /><div> <input onclick='$("#semLoader").css("visibility", "visible"); $("#semLoader").show();' type="submit" name="submit" id="submit" class="btn" value="Add this corpus" /><div>
<div id="pubmedcrawl" align="right"><a data-toggle="modal" href="#stack1">&#10142; Query directly in PubMed</a></div>
</center> </center>
</p> </p>
...@@ -147,7 +148,114 @@ ...@@ -147,7 +148,114 @@
</div> </div>
<!-- Modal -->
<div class="modal fade" id="stack1" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3>Query to PubMed</h3>
</div>
<div class="modal-body">
<p>One fine body…</p>
<input id="daquery" type="text" class="input-lg" data-tabindex="2">
<a onclick="getGlobalResults();" class="btn">Scan</a>
<div id="results"></div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button onclick="doTheQuery();" disabled id="id_thebutton" type="button" class="btn btn-primary">Explore a sample!</button>
</div>
</div><!-- /.modal-content -->
</div><!-- /.modal-dialog -->
</div><!-- /.modal -->
<script> <script>
function getCookie(name) {
var cookieValue = null;
if (document.cookie && document.cookie != '') {
var cookies = document.cookie.split(';');
for (var i = 0; i < cookies.length; i++) {
var cookie = jQuery.trim(cookies[i]);
// Does this cookie string begin with the name we want?
if (cookie.substring(0, name.length + 1) == (name + '=')) {
cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
break;
}
}
}
return cookieValue;
}
var thequeries = []
function doTheQuery() {
if ( $('#id_thebutton').prop('disabled') ) return;
console.log("in doTheQuery:");
var origQuery = $("#daquery").val()
var pubmedifiedQuery = { query : JSON.stringify(thequeries) , string: origQuery } ;
console.log(pubmedifiedQuery)
var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/project/"+projectid+"/pubmedquery/go",
data: pubmedifiedQuery,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in doTheQuery()")
console.log(data)
},
error: function(result) {
console.log("in doTheQuery(). Data not found");
}
});
}
function getGlobalResults(){
// AJAX to django
var pubmedquery = $("#daquery").val()
var formData = {query:pubmedquery}
$("#results").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/pubmedquery",
data: formData,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in getGlobalResults")
console.log(data)
thequeries = data
var N=0,k=0;
for(var i in thequeries) N += thequeries[i].count
if(N>0) {
$("#results").html("Result: "+N+" publications in the last 5 years")
$('#id_thebutton').prop('disabled', false);
}
},
error: function(result) {
console.log("Data not found");
}
});
}
// Morris Donut Chart // Morris Donut Chart
Morris.Donut({ Morris.Donut({
element: 'hero-donut', element: 'hero-donut',
...@@ -163,6 +271,7 @@ ...@@ -163,6 +271,7 @@
//colors: ["#30a1ec", "#76bdee"], //colors: ["#30a1ec", "#76bdee"],
formatter: function (y) { return y + "%" } formatter: function (y) { return y + "%" }
}); });
</script> </script>
......
...@@ -52,7 +52,6 @@ ...@@ -52,7 +52,6 @@
{{ formResource.non_field_errors }} {{ formResource.non_field_errors }}
{{ formResource.as_p}} {{ formResource.as_p}}
<input onclick='$("#semLoader").css("visibility", "visible"); $("#semLoader").show();' type="submit" name="submit" id="submit" class="btn" value="Add this corpus" /><div> <input onclick='$("#semLoader").css("visibility", "visible"); $("#semLoader").show();' type="submit" name="submit" id="submit" class="btn" value="Add this corpus" /><div>
<div id="pubmedcrawl" align="right"><a data-toggle="modal" href="#stack1">&#10142; Query directly in PubMed</a></div>
</center> </center>
</p> </p>
...@@ -148,114 +147,7 @@ ...@@ -148,114 +147,7 @@
</div> </div>
<!-- Modal -->
<div class="modal fade" id="stack1" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
<h3>Query to PubMed</h3>
</div>
<div class="modal-body">
<p>One fine body…</p>
<input id="daquery" type="text" class="input-lg" data-tabindex="2">
<a onclick="getGlobalResults();" class="btn">Scan</a>
<div id="results"></div>
</div>
<div class="modal-footer">
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
<button onclick="doTheQuery();" disabled id="id_thebutton" type="button" class="btn btn-primary">Explore a sample!</button>
</div>
</div><!-- /.modal-content -->
</div><!-- /.modal-dialog -->
</div><!-- /.modal -->
<script> <script>
function getCookie(name) {
var cookieValue = null;
if (document.cookie && document.cookie != '') {
var cookies = document.cookie.split(';');
for (var i = 0; i < cookies.length; i++) {
var cookie = jQuery.trim(cookies[i]);
// Does this cookie string begin with the name we want?
if (cookie.substring(0, name.length + 1) == (name + '=')) {
cookieValue = decodeURIComponent(cookie.substring(name.length + 1));
break;
}
}
}
return cookieValue;
}
var thequeries = []
function doTheQuery() {
if ( $('#id_thebutton').prop('disabled') ) return;
console.log("in doTheQuery:");
var origQuery = $("#daquery").val()
var pubmedifiedQuery = { query : JSON.stringify(thequeries) , string: origQuery } ;
console.log(pubmedifiedQuery)
var projectid = window.location.href.split("project")[1].replace(/\//g, '')//replace all the slashes
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/project/"+projectid+"/pubmedquery/go",
data: pubmedifiedQuery,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in doTheQuery()")
console.log(data)
},
error: function(result) {
console.log("in doTheQuery(). Data not found");
}
});
}
function getGlobalResults(){
// AJAX to django
var pubmedquery = $("#daquery").val()
var formData = {query:pubmedquery}
$("#results").html('<img width="30px" src="{% static "js/libs/img2/loading-bar.gif" %}"></img>')
$.ajax({
// contentType: "application/json",
url: window.location.origin+"/tests/pubmedquery",
data: formData,
type: 'POST',
beforeSend: function(xhr) {
xhr.setRequestHeader("X-CSRFToken", getCookie("csrftoken"));
},
success: function(data) {
console.log("in getGlobalResults")
console.log(data)
thequeries = data
var N=0,k=0;
for(var i in thequeries) N += thequeries[i].count
if(N>0) {
$("#results").html("Result: "+N+" publications in the last 5 years")
$('#id_thebutton').prop('disabled', false);
}
},
error: function(result) {
console.log("Data not found");
}
});
}
// Morris Donut Chart // Morris Donut Chart
Morris.Donut({ Morris.Donut({
element: 'hero-donut', element: 'hero-donut',
...@@ -271,7 +163,6 @@ ...@@ -271,7 +163,6 @@
//colors: ["#30a1ec", "#76bdee"], //colors: ["#30a1ec", "#76bdee"],
formatter: function (y) { return y + "%" } formatter: function (y) { return y + "%" }
}); });
</script> </script>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment