[UPDATE] last stable version

f9a08e53 · PkSM3 · fd2ae499 · f9a08e53 · f9a08e53 · f9a08e53
Commit f9a08e53 authored Mar 05, 2015 by PkSM3
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 9 deletions

urls.py gargantext_web/urls.py +1 -0

views.py gargantext_web/views.py +29 -0

models.py node/models.py +16 -7

project.html templates/project.html +2 -2

No files found.
--- a/gargantext_web/urls.py
+++ b/gargantext_web/urls.py
@@ -51,6 +51,7 @@ urlpatterns = patterns('',
    url(r'^corpus/(\d+)/node_link.json$', views.node_link),
    url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
    url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
+    url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2),

    # Data management
    url(r'^api$', gargantext_web.api.Root),

--- a/gargantext_web/views.py
+++ b/gargantext_web/views.py
@@ -912,6 +912,35 @@ def nodeinfo(request , node_id):
    return HttpResponse(html)


+def tfidf2(request, corpus_id, ngram_id):
+    """
+    Takes IDs of corpus and ngram and returns list of relevent documents in json format
+    according to TFIDF score (order is decreasing).
+    """
+    #it will receive something like:  api/tfidf/corpus_id/NGRAM1aNGRAM2aNGRAM3aNGRAM4...
+    docsids = ngram_id.split("a")
+
+    tfidf_list = []
+    for i in docsids:
+        pub = Node.objects.get(id=i)
+        finalpub = {}
+        finalpub["id"] = pub.id
+        pubmetadata = pub.metadata
+        if "title" in pubmetadata: finalpub["title"] = pubmetadata['title']
+        if "publication_date" in pubmetadata: finalpub["publication_date"] = pubmetadata['publication_date']
+        if "journal" in pubmetadata: finalpub["journal"] = pubmetadata['journal']
+        if "authors" in pubmetadata: finalpub["authors"] = pubmetadata['authors']
+        if "fields" in pubmetadata: finalpub["fields"] = pubmetadata['fields']
+        tfidf_list.append(finalpub) # doing a dictionary with only available atributes
+        if len(tfidf_list)==6: break # max 6 papers
+    
+    data = json.dumps(tfidf_list) 
+
+
+
+    # data = ["hola","mundo"]
+    return JsonHttpResponse(data)
+
 def tfidf(request, corpus_id, ngram_id):
    """
    Takes IDs of corpus and ngram and returns list of relevent documents in json format

--- a/node/models.py
+++ b/node/models.py
@@ -24,6 +24,9 @@ from gargantext_web.settings import MEDIA_ROOT
 from celery.contrib.methods import task_method
 from celery import current_app

+import os
+import subprocess
+

 # Some usefull functions
 # TODO: start the function name with an underscore (private)
@@ -278,8 +281,6 @@ class Node(CTENode):
        self.metadata['Processing'] = 0
        self.save()

-
-
    def runInParallel(self, *fns):
        proc = []
        for fn in fns:
@@ -484,6 +485,7 @@ class Node(CTENode):
        labels = dict()
        weight = dict()

+        print("PRINTING NUMBER OF NODES 01:",len(G))
        for e in G.edges_iter():
            n1 = e[0]
            n2 = e[1]
@@ -528,6 +530,7 @@ class Node(CTENode):
                    G.add_edge(node, "cluster " + str(partition[node]), weight=3)
                except Exception as error:
                    print("ERROR:",error)
+            print("PRINTING NUMBER OF NODES 02:",len(G))
            data = json_graph.node_link_data(G)

        elif type == "adjacency":
@@ -545,14 +548,20 @@ class Node(CTENode):
        
        return data

-        
-
    def workflow__MOV(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False):
        import time
        total = 0
        self.metadata['Processing'] = 1
        self.save()

+        # # pwd = subprocess.Popen("cd /srv/gargantext/parsing/Taggers/nlpserver && pwd", stdout=subprocess.PIPE).stdout.read()
+        # # print(subprocess.Popen(['ls', '-lah'], stdout=subprocess.PIPE).communicate()[0].decode('utf-8'))
+        # print("activating nlpserver:")
+        # command = 'cd parsing/Taggers/nlpserver; python3 server.py'
+        # process = subprocess.Popen(command,stdout=subprocess.PIPE , stderr=subprocess.DEVNULL , shell=True)
+        
+
+
        print("LOG::TIME: In workflow()    parse_resources__MOV()")
        start = time.time()
        theMetadata = self.parse_resources__MOV()
@@ -575,6 +584,9 @@ class Node(CTENode):
        total += (end - start)
        print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" extract_ngrams__MOV() [s]",(end - start))

+        # process.kill()
+        # print("ok, process killed")
+
        start = time.time()
        print("LOG::TIME: In workflow()    do_tfidf()")
        resultDict = self.do_tfidf__MOV( FreqList , theMetadata)
@@ -591,9 +603,6 @@ class Node(CTENode):
        total += (end - start)
        print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" do_coocmatrix() [s]",(end - start))

-        # import pprint
-        # pprint.pprint(jsongraph)
-
        print("the user:",self.user)
        print("the project id:",self.parent.id)
        print("the corpus id:",self.id)

--- a/templates/project.html
+++ b/templates/project.html
@@ -424,8 +424,8 @@
 		//CSS events for changing the Select element
 		function CustomForSelect( selected ) {
 			// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
-			//if(selected=="pubmed" || selected=="istext") {
-			if(selected=="pubmed") {
+			if(selected=="pubmed" || selected=="istext") {
+			// if(selected=="pubmed") {
 				console.log("show the button for: "+selected)
 				$("#pubmedcrawl").css("visibility", "visible"); 
 				$("#pubmedcrawl").show();