[TUTO] Philomemy Notebook created with main functions to explorer the subject...

[TUTO] Philomemy Notebook created with main functions to explorer the subject in collaboration with David.

[TUTO] Philomemy Notebook created with main functions to explorer the subject...
[TUTO] Philomemy Notebook created with main functions to explorer the subject in collaboration with David.
29f9c8a0 · Alexandre Delanoë · 11255619 · 29f9c8a0 · 29f9c8a0 · 29f9c8a0
Commit 29f9c8a0 authored Oct 12, 2017 by Alexandre Delanoë
6 changed files
--- a/gargantext/util/toolchain/metric_tfidf.py
+++ b/gargantext/util/toolchain/metric_tfidf.py
@@ -23,7 +23,7 @@ from datetime             import datetime
 def t():
    return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")

-def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
+def compute_occs(corpus, overwrite_id = None, groupings_id = None, year=None, start=None, end=None, interactiv=False):
    """
    Calculates sum of occs per ngram (or per mainform if groups) within corpus
                 (used as info in the ngrams table view)
@@ -61,6 +61,8 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
                    .group_by(NodeNgram.ngram_id)
                   )

+        if year is not None:
+            occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))

    #   difficult case: with groups
    #                   ------------
@@ -108,6 +110,10 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
                    # for the sum
                    .group_by("counted_form")
                 )
+        
+        if year is not None:
+            occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))
+

    #print(str(occs_q.all()))
    occ_sums = occs_q.all()
@@ -134,13 +140,17 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):

    # £TODO  make it NodeNgram instead NodeNodeNgram ! and rebase :/
    #        (idem ti_ranking)
-    bulk_insert(
-        NodeNodeNgram,
-        ('node1_id' , 'node2_id', 'ngram_id', 'score'),
-        ((the_id, corpus.id,  res[0], res[1]) for res in occ_sums)
-    )

-    return the_id
+    if interactiv is False :
+        bulk_insert(
+            NodeNodeNgram,
+            ('node1_id' , 'node2_id', 'ngram_id', 'score'),
+            ((the_id, corpus.id,  res[0], res[1]) for res in occ_sums)
+        )
+
+        return the_id
+    else :
+        return [(res[0], res[1]) for res in occ_sums]


 def compute_ti_ranking(corpus,

--- a/gargantext/util/toolchain/ngram_coocs.py
+++ b/gargantext/util/toolchain/ngram_coocs.py
@@ -20,6 +20,7 @@ def compute_coocs(  corpus,
                    stoplist_id     = None,
                    start           = None,
                    end             = None,
+                    year            = None,
                    symmetry_filter = False,
                    diagonal_filter = True):
    """
@@ -97,14 +98,21 @@ def compute_coocs(  corpus,
        WHERE
            n.typename  = {nodetype_id}
        AND n.parent_id = {corpus_id}
+            """.format( nodetype_id = NODETYPES.index('DOCUMENT')
+                  , corpus_id=corpus.id
+                  )
+    if year :
+        cooc_filter_sql += """
+        AND n.hyperdata -> 'publication_year' = '{year}'
+            """.format( year=str(year))
+
+    cooc_filter_sql += """
        GROUP BY 1,2
        --    ==
        -- GROUP BY ngA, ngB
        )
-        """.format( nodetype_id = NODETYPES.index('DOCUMENT')
-                  , corpus_id=corpus.id
-                  )
-
+        """
+    
    # 3) taking the cooccurrences of ngram x2
    ngram_filter_A_sql += """
        -- STEP 1: X axis of the matrix

--- a/graph/cooccurrences.py
+++ b/graph/cooccurrences.py
@@ -230,6 +230,7 @@ def countCooccurrences( corpus_id=None      , cooc_id=None
        session.commit()

        #data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
-        #return data
+    else:
+        return cooc

    return(coocNode.id, cooc)
--- a/install/notebook/gargantext_notebook.py
+++ b/install/notebook/gargantext_notebook.py
@@ -25,7 +25,7 @@ from django.http import Http404
 # Import those to be available by notebook user
 from langdetect import detect as detect_lang
 from gargantext.models import UserNode, User
-
+import functools

 class NotebookError(Exception):
    pass
@@ -40,8 +40,11 @@ def documents(corpus_id):
 #import seaborn as sns
 import pandas as pd

+def countByField(docs, field):
+    return list(Counter([doc.hyperdata[field] for doc in docs]).items())
+
 def chart(docs, field):
-    year_publis = list(Counter([doc.hyperdata[field] for doc in docs]).items())
+    year_publis = countByField(docs, field)
    frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'])
    frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
    return frame1

--- a/notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
--- a/notebooks/AdvancedTutorial.ipynb
+++ b/notebooks/AdvancedTutorial.ipynb