Commit 29f9c8a0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TUTO] Philomemy Notebook created with main functions to explorer the subject...

[TUTO] Philomemy Notebook created with main functions to explorer the subject in collaboration with David.
parent 11255619
......@@ -23,7 +23,7 @@ from datetime import datetime
def t():
return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
def compute_occs(corpus, overwrite_id = None, groupings_id = None, year=None, start=None, end=None, interactiv=False):
"""
Calculates sum of occs per ngram (or per mainform if groups) within corpus
(used as info in the ngrams table view)
......@@ -61,6 +61,8 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
.group_by(NodeNgram.ngram_id)
)
if year is not None:
occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))
# difficult case: with groups
# ------------
......@@ -108,6 +110,10 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
# for the sum
.group_by("counted_form")
)
if year is not None:
occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))
#print(str(occs_q.all()))
occ_sums = occs_q.all()
......@@ -134,13 +140,17 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
# £TODO make it NodeNgram instead NodeNodeNgram ! and rebase :/
# (idem ti_ranking)
bulk_insert(
NodeNodeNgram,
('node1_id' , 'node2_id', 'ngram_id', 'score'),
((the_id, corpus.id, res[0], res[1]) for res in occ_sums)
)
return the_id
if interactiv is False :
bulk_insert(
NodeNodeNgram,
('node1_id' , 'node2_id', 'ngram_id', 'score'),
((the_id, corpus.id, res[0], res[1]) for res in occ_sums)
)
return the_id
else :
return [(res[0], res[1]) for res in occ_sums]
def compute_ti_ranking(corpus,
......
......@@ -20,6 +20,7 @@ def compute_coocs( corpus,
stoplist_id = None,
start = None,
end = None,
year = None,
symmetry_filter = False,
diagonal_filter = True):
"""
......@@ -97,14 +98,21 @@ def compute_coocs( corpus,
WHERE
n.typename = {nodetype_id}
AND n.parent_id = {corpus_id}
""".format( nodetype_id = NODETYPES.index('DOCUMENT')
, corpus_id=corpus.id
)
if year :
cooc_filter_sql += """
AND n.hyperdata -> 'publication_year' = '{year}'
""".format( year=str(year))
cooc_filter_sql += """
GROUP BY 1,2
-- ==
-- GROUP BY ngA, ngB
)
""".format( nodetype_id = NODETYPES.index('DOCUMENT')
, corpus_id=corpus.id
)
"""
# 3) taking the cooccurrences of ngram x2
ngram_filter_A_sql += """
-- STEP 1: X axis of the matrix
......
......@@ -230,6 +230,7 @@ def countCooccurrences( corpus_id=None , cooc_id=None
session.commit()
#data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
#return data
else:
return cooc
return(coocNode.id, cooc)
......@@ -25,7 +25,7 @@ from django.http import Http404
# Import those to be available by notebook user
from langdetect import detect as detect_lang
from gargantext.models import UserNode, User
import functools
class NotebookError(Exception):
pass
......@@ -40,8 +40,11 @@ def documents(corpus_id):
#import seaborn as sns
import pandas as pd
def countByField(docs, field):
return list(Counter([doc.hyperdata[field] for doc in docs]).items())
def chart(docs, field):
year_publis = list(Counter([doc.hyperdata[field] for doc in docs]).items())
year_publis = countByField(docs, field)
frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'])
frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
return frame1
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment