[FEAT] Diff of graph implemented. TODO: change colors in Graph Explorer....

[FEAT] Diff of graph implemented. TODO: change colors in Graph Explorer. Instead of yellow -> red : blue -> red.

[FEAT] Diff of graph implemented. TODO: change colors in Graph Explorer....
[FEAT] Diff of graph implemented. TODO: change colors in Graph Explorer. Instead of yellow -> red : blue -> red.
b0d02616 · delanoe · 69006133 · b0d02616
Commit b0d02616 authored May 12, 2016 by delanoe
Hide whitespace changes
Inline Side-by-side

Showing with 73 additions and 46 deletions

intersection.py graphExplorer/intersection.py +73 -46

No files found.
--- a/graphExplorer/intersection.py
+++ b/graphExplorer/intersection.py
@@ -11,66 +11,93 @@ import datetime
 import ast
 import networkx as nx

-def intersection(request , corpuses_ids, measure='cooc'):
-    '''
-    intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
-    intersection = gives the intersection of two graphs

+def doc_freq(corpus_id, node_ids):
    '''
-    if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :
-
-        node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
-        # Here are the visible nodes of the initial semantic map.
-
-        corpuses_ids = corpuses_ids.split('a')
-
-        corpuses_ids = [int(i) for i in corpuses_ids]
-        # corpus[1] will be the corpus to compare
+    doc_freq :: Corpus_id -> [(Ngram_id, Int)]
+    Given a corpus, compute number of documents that have the ngram in it.
+    '''
+    return ( session.query(NodeNgram.ngram_id, func.count(NodeNgram.node_id))
+                    .join(Node, NodeNgram.node_id == Node.id)
+                    .filter( Node.parent_id == corpus_id
+                           , Node.typename== 'DOCUMENT')
+                    .filter( NodeNgram.weight > 0 
+                           , NodeNgram.ngram_id.in_(node_ids) )
+                    .group_by(NodeNgram.ngram_id)
+                    .all()
+                  )
+
+def doc_ngram_representativity(corpus_id, node_ids):
+    '''
+    doc_ngram_representativity :: Corpus_ID -> Dict Ngram_id Float
+    Given a corpus, compute part of of documents that have the ngram it it.
+    '''
+    nodes_count = ( session.query(Node)
+                           .filter( Node.parent_id == corpus_id
+                                  , Node.typename == 'DOCUMENT'
+                                  )
+                           .count()
+                  )

+    result = dict()
+    for ngram_id, somme in doc_freq(corpus_id, node_ids):
+        result[ngram_id] = somme / nodes_count

-            
-        def representativity(corpus_id):
-            ngrams_data = ( session.query(Ngram.id, NodeNgram)
-                                .join(Node, NodeNgram.node_id == Node.id)
-                                .filter( Node.parent_id == corpus_id
-                                       , Node.typename== 'DOCUMENT')
-                                .filter( NodeNgram.weight > 0 )
-                                .filter( Ngram.id.in_(node_ids) )
-                                .group_by(Ngram.id)
-                                .count()
-                                )
-            nodes_count = ( session.query(Node)
-                                   .filter( Node.parent_id == corpus_id
-                                          , Node.typename == 'DOCUMENT'
-                                          )
-                                   .count()
-                                   )[0]
-        
-            result = dict()
-            for ngram_id, somme in ngrams_data:
-                result[ngram_id] = somme / nodes_count
+    return result

-            return result
+def compare_corpora(Corpus_id_A, Corpus_id_B, node_ids):
+    '''
+    compare_corpora :: Corpus_id -> Corpus_id -> Dict Ngram_id Float
+    Given two corpus :
+        - if corpora are the same, it return :
+            (dict of document frequency per ngram as key)
+        - if corpora are different, it returns :
+            doc_ngram_representativit(Corpus_id_A) / doc_ngram_representativity(Corpus_id_B)
+            (as dict per ngram as key)
+    '''

-        data_0 = representativity(corpuses_ids[0])
-        data_1 = representativity(corpuses_ids[1])
-        
+    result = dict()
+    
+    if int(Corpus_id_A) == int(Corpus_id_B):
+        for ngram_id, somme in doc_freq(Corpus_id_A, node_ids):
+            result[ngram_id] = somme
+    
+    else:
+
+        data_A = doc_ngram_representativity(Corpus_id_A, node_ids)
+        data_B = doc_ngram_representativity(Corpus_id_B, node_ids)
+    
        queue     = list()

-        for k in data_0.keys():
-            if data_1[k] == 0:
+        for k in data_A.keys():
+            if k not in data_B.keys():
                queue.append(k)
            else:
-                FinalDict[k] = data_0[k] / data_1[k]
+                result[k] = data_A[k] / data_B[k]

-        maximum = max([ FinalDict[k] for k in FinalDict.keys()])
+        maximum = max([ result[k] for k in result.keys()])
+        minimum = min([ result[k] for k in result.keys()])

        for k in queue:
-            FinalDict[k] = maximum +1
+            result[k] = minimum
+
+    return result
+
+def intersection(request , corpuses_ids, measure='cooc'):
+    '''
+    intersection :: (str(Int) + "a" str(Int)) -> Dict(Ngram.id :: Int, Score :: Int)
+    intersection = returns as Json Http Response the intersection of two graphs
+
+    '''
+    if request.method == 'POST' and "nodeids" in request.POST and len(request.POST["nodeids"])>0 :

-        print("-" * 100)
-        print(FinalDict)
+        node_ids = [int(i) for i in (ast.literal_eval( request.POST["nodeids"] )) ]
+        # Here are the visible nodes of the initial semantic map.

-        return JsonHttpResponse(FinalDict)
+        corpuses_ids = corpuses_ids.split('a')

+        corpuses_ids = [int(i) for i in corpuses_ids]
+        # corpus[1] will be the corpus to compare
+        
+        return JsonHttpResponse(compare_corpora(corpuses_ids[0], corpuses_ids[1], node_ids))