Commit 7f721cee authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TUTO] Philomemy Notebook created with main functions to explorer the subject...

[TUTO] Philomemy Notebook created with main functions to explorer the subject in collaboration with David.
parent 4bda3617
...@@ -23,7 +23,7 @@ from datetime import datetime ...@@ -23,7 +23,7 @@ from datetime import datetime
def t(): def t():
return datetime.now().strftime("%Y-%m-%d_%H:%M:%S") return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
def compute_occs(corpus, overwrite_id = None, groupings_id = None,): def compute_occs(corpus, overwrite_id = None, groupings_id = None, year=None, start=None, end=None, interactiv=False):
""" """
Calculates sum of occs per ngram (or per mainform if groups) within corpus Calculates sum of occs per ngram (or per mainform if groups) within corpus
(used as info in the ngrams table view) (used as info in the ngrams table view)
...@@ -61,6 +61,8 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,): ...@@ -61,6 +61,8 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
.group_by(NodeNgram.ngram_id) .group_by(NodeNgram.ngram_id)
) )
if year is not None:
occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))
# difficult case: with groups # difficult case: with groups
# ------------ # ------------
...@@ -108,6 +110,10 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,): ...@@ -108,6 +110,10 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
# for the sum # for the sum
.group_by("counted_form") .group_by("counted_form")
) )
if year is not None:
occs_q = occs_q.filter(Node.hyperdata["publication_year"].astext == str(year))
#print(str(occs_q.all())) #print(str(occs_q.all()))
occ_sums = occs_q.all() occ_sums = occs_q.all()
...@@ -134,13 +140,17 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,): ...@@ -134,13 +140,17 @@ def compute_occs(corpus, overwrite_id = None, groupings_id = None,):
# £TODO make it NodeNgram instead NodeNodeNgram ! and rebase :/ # £TODO make it NodeNgram instead NodeNodeNgram ! and rebase :/
# (idem ti_ranking) # (idem ti_ranking)
bulk_insert(
NodeNodeNgram,
('node1_id' , 'node2_id', 'ngram_id', 'score'),
((the_id, corpus.id, res[0], res[1]) for res in occ_sums)
)
return the_id if interactiv is False :
bulk_insert(
NodeNodeNgram,
('node1_id' , 'node2_id', 'ngram_id', 'score'),
((the_id, corpus.id, res[0], res[1]) for res in occ_sums)
)
return the_id
else :
return [(res[0], res[1]) for res in occ_sums]
def compute_ti_ranking(corpus, def compute_ti_ranking(corpus,
......
...@@ -20,6 +20,7 @@ def compute_coocs( corpus, ...@@ -20,6 +20,7 @@ def compute_coocs( corpus,
stoplist_id = None, stoplist_id = None,
start = None, start = None,
end = None, end = None,
year = None,
symmetry_filter = False, symmetry_filter = False,
diagonal_filter = True): diagonal_filter = True):
""" """
...@@ -97,14 +98,21 @@ def compute_coocs( corpus, ...@@ -97,14 +98,21 @@ def compute_coocs( corpus,
WHERE WHERE
n.typename = {nodetype_id} n.typename = {nodetype_id}
AND n.parent_id = {corpus_id} AND n.parent_id = {corpus_id}
""".format( nodetype_id = NODETYPES.index('DOCUMENT')
, corpus_id=corpus.id
)
if year :
cooc_filter_sql += """
AND n.hyperdata -> 'publication_year' = '{year}'
""".format( year=str(year))
cooc_filter_sql += """
GROUP BY 1,2 GROUP BY 1,2
-- == -- ==
-- GROUP BY ngA, ngB -- GROUP BY ngA, ngB
) )
""".format( nodetype_id = NODETYPES.index('DOCUMENT') """
, corpus_id=corpus.id
)
# 3) taking the cooccurrences of ngram x2 # 3) taking the cooccurrences of ngram x2
ngram_filter_A_sql += """ ngram_filter_A_sql += """
-- STEP 1: X axis of the matrix -- STEP 1: X axis of the matrix
......
...@@ -230,6 +230,7 @@ def countCooccurrences( corpus_id=None , cooc_id=None ...@@ -230,6 +230,7 @@ def countCooccurrences( corpus_id=None , cooc_id=None
session.commit() session.commit()
#data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness) #data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
#return data else:
return cooc
return(coocNode.id, cooc) return(coocNode.id, cooc)
...@@ -25,7 +25,7 @@ from django.http import Http404 ...@@ -25,7 +25,7 @@ from django.http import Http404
# Import those to be available by notebook user # Import those to be available by notebook user
from langdetect import detect as detect_lang from langdetect import detect as detect_lang
from gargantext.models import UserNode, User from gargantext.models import UserNode, User
import functools
class NotebookError(Exception): class NotebookError(Exception):
pass pass
...@@ -40,8 +40,11 @@ def documents(corpus_id): ...@@ -40,8 +40,11 @@ def documents(corpus_id):
#import seaborn as sns #import seaborn as sns
import pandas as pd import pandas as pd
def countByField(docs, field):
return list(Counter([doc.hyperdata[field] for doc in docs]).items())
def chart(docs, field): def chart(docs, field):
year_publis = list(Counter([doc.hyperdata[field] for doc in docs]).items()) year_publis = countByField(docs, field)
frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue']) frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'])
frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date) frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
return frame1 return frame1
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment