Commit 4afb0468 authored by delanoe's avatar delanoe

[FEAT] Graph explorer, options required.

parent f6e65087
......@@ -293,3 +293,14 @@ RULE_NPN = "{<JJ.*>*<NN.*>+((<P|IN> <DT>? <JJ.*>* <NN.*>+ <JJ.*>*)|(<JJ.*>))*
RULE_TINA = "^((VBD,|VBG,|VBN,|CD.?,|JJ.?,|\?,){0,2}?(N.?.?,|\?,)+?(CD.,)??)\
+?((PREP.?|DET.?,|IN.?,|CC.?,|\?,)((VBD,|VBG,|VBN,|CD.?,|JJ.?,|\?\
,){0,2}?(N.?.?,|\?,)+?)+?)*?$"
# ------------------------------------------------------------------------------
# Graph constraints to compute the graph:
# Modes: live graph generation, graph asynchronously computed or errors detected
# here are the maximum size of corpus and maplist required to compute the graph
graph_constraints = {'corpus' : 400
,'mapList': 50
}
Module Graph Explorer: from text to graph.
Module Graph Explorer: from text to graph
=========================================
Maintainer: If you see bugs, please report to team@gargantext.org
0) All urls.py of the Graph Explorer
1) Main view of the graph explorer: views.py
2) Data are retrieved as REST: rest.py
3) Graph is generated (graph.py) through different steps
a) check the constraints (graph_constraints) in gargantext/constants.py
b) Cooccurences are computed (in live or asynchronously): cooccurrences.py
c) Thresold and distances : distances.py
d) clustering: louvain.py
c) links between communities: bridgeness.py
4) Additional features:
a) intersection of graphs: intersection.py
......@@ -10,6 +10,7 @@ from graph.distances import clusterByDistances
from graph.bridgeness import filterByBridgeness
from gargantext.util.scheduling import scheduled
from gargantext.constants import graph_constraints
from datetime import datetime
......@@ -42,15 +43,33 @@ def get_graph( request=None , corpus=None
before_cooc = datetime.now()
if cooc_id == None:
# case of Cooccurrences have not been computed already
if cooc_id == None:
# case of mapList not big enough
# ==============================
# if we do not have any mapList_id already
if mapList_id is None:
mapList_id = session.query(Node.id).filter(Node.typename == "MAPLIST").first()[0]
mapList_size = session.query(NodeNgram).filter(NodeNgram.node_id == mapList_id)
if mapList_size.count() < graph_constraints['mapList']:
# Do not compute the graph if mapList is not big enough
return {'nodes':[], 'links':[]}
# case of corpus not big enough
# ==============================
corpus_size_query = (session.query(Node)
.filter(Node.typename=="DOCUMENT")
.filter(Node.parent_id == corpus.id)
)
# filter by date if any start date
# --------------------------------
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start = datetime.strptime (str(start), "%Y-%m-%d")
......@@ -65,6 +84,8 @@ def get_graph( request=None , corpus=None
)
# filter by date if any end date
# --------------------------------
if end is not None:
date_end = datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
......@@ -77,8 +98,13 @@ def get_graph( request=None , corpus=None
.filter( End.key == 'publication_date')
.filter( End.value_utc <= date_end_utc )
)
if corpus_size_query.count() > 400:
# Finally test if the size of the corpora is big enough
# --------------------------------
if corpus_size_query.count() > graph_constraints['corpus']:
# Then compute cooc asynchronously with celery
scheduled(countCooccurrences)( corpus_id=corpus.id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
......@@ -87,9 +113,12 @@ def get_graph( request=None , corpus=None
, save_on_db = True
#, limit=size
)
return {'nodes':[], 'links':[1]} # Dic trick to inform user that graph is computed asynchronously
# Dic hack to inform user that graph is computed asynchronously
# (Impossible graph: no nodes with one link)
return {'nodes':[], 'links':[1]}
else:
# If graph_constraints are ok then compute the graph in live
cooc_matrix = countCooccurrences( corpus_id=corpus.id
#, field1="ngrams", field2="ngrams"
, start=start , end =end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment