Commit dbedae3c authored by delanoe's avatar delanoe

[FACTO/FIX] Facto graphs, removing all the tests we made, cleaning code.

parent c874d397
......@@ -11,12 +11,10 @@ import datetime
def do_cooc(corpus=None
, field1='ngrams', field2='ngrams'
, mainList_id=None, stopList_id=None, groupList_id=None
, mainList_id=None, groupList_id=None
, coocNode_id=None
, cvalue_id=None
, n_min=1, n_max=None
, start=None, end=None
, limit=1000
, n_min=1, n_max=None , limit=1000
, isMonopartite=True
, threshold = 3):
'''
......@@ -24,9 +22,8 @@ def do_cooc(corpus=None
For the moment list of paramters are not supported because, lists need to
be merged before.
corpus :: Corpus
cvalue_id :: Int
mainList_id :: Int
stopList_id :: Int
groupList_id :: Int
For the moment, start and end are simple, only year is implemented yet
......@@ -42,10 +39,13 @@ def do_cooc(corpus=None
# Get node
if not coocNode_id:
coocNode_id0 = (session.query(Node.id).filter(Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER"
, Node.parent_id == corpus.id
).first())
coocNode_id0 = ( session.query( Node.id )
.filter( Node.typename == "COOCCURRENCES"
, Node.name == "GRAPH EXPLORER"
, Node.parent_id == corpus.id
)
.first()
)
if not coocNode_id:
coocNode = corpus.add_child(
typename = "COOCCURRENCES",
......@@ -76,8 +76,8 @@ def do_cooc(corpus=None
#
# node_cooc.hyperdata = hyperdata
#
# For tests only
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==coocNode_id).delete()
# For tests only : delete previous cooccurrences
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.commit()
......@@ -85,37 +85,57 @@ def do_cooc(corpus=None
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query])
if isMonopartite :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id)
.filter(Node.parent_id==corpus.id, Node.typename=="DOCUMENT")
)
cooc_query = (session.query( NodeNgramX.ngram_id
, NodeNgramY.ngram_id
, cooc_score
)
.join( Node
, Node.id == NodeNgramX.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.filter( Node.parent_id==corpus.id
, Node.typename=="DOCUMENT"
)
)
else :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id)
.join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id)
.filter(Node.parent_id == corpus.id, Node.typename == "DOCUMENT")
.filter(Hyperdata.name == field1)
)
#print(cooc_query)
cooc_query = (session.query( NodeHyperdataNgram.ngram_id
, NodeNgramY.ngram_id
, cooc_score
)
.join( Node
, Node.id == NodeHyperdataNgram.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.join( Hyperdata
, Hyperdata.id == NodeHyperdataNgram.hyperdata_id
)
.filter( Node.parent_id == corpus.id
, Node.typename == "DOCUMENT"
)
.filter( Hyperdata.name == field1 )
)
# Size of the ngrams between n_min and n_max
if n_min is not None or n_max is not None:
if isMonopartite:
NgramX = aliased(Ngram)
cooc_query = cooc_query.join(NgramX, NgramX.id == NodeNgramX.ngram_id)
cooc_query = cooc_query.join ( NgramX
, NgramX.id == NodeNgramX.ngram_id
)
NgramY = aliased(Ngram)
cooc_query = (cooc_query
.join(NgramY, NgramY.id == NodeNgramY.ngram_id)
)
cooc_query = cooc_query.join ( NgramY
, NgramY.id == NodeNgramY.ngram_id
)
if n_min is not None:
cooc_query = (cooc_query
......@@ -173,42 +193,14 @@ def do_cooc(corpus=None
cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
cooc_query = cooc_query.order_by(desc('cooc_score'))
# END of the query
matrix = WeightedMatrix(cooc_query)
#print(matrix)
# Select according some scores
if cvalue_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list = UnweightedList(session.query(NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.nodex_id == cvalue_id).all()
)
if isMonopartite:
if mainList_id is not None :
miam_list = UnweightedList(mainList_id)
if stopList_id is not None :
stop_list = UnweightedList(stopList_id)
if groupList_id is not None :
group_list = Translations(groupList_id)
if mainList_id is not None and stopList_id is None and groupList_id is None :
cooc = matrix & miam_list
elif mainList_id is not None and stopList_id is not None and groupList_id is None :
cooc = matrix & (miam_list - stop_list)
elif mainList_id is not None and stopList_id is not None and groupList_id is not None :
print("mainList_id is not None and stopList_id is not None and groupList_id is not None")
cooc = matrix & (miam_list * group_list - stop_list)
#cooc = matrix & (miam_list - stop_list)
elif mainList_id is not None and stopList_id is None and groupList_id is not None :
cooc = matrix & (miam_list * group_list)
else :
cooc = matrix
else:
cooc = matrix
mainList = UnweightedList( mainList_id )
group_list = Translations ( groupList_id )
cooc = matrix & (mainList * group_list)
cooc.save(coocNode_id)
return(coocNode_id)
......@@ -29,30 +29,36 @@ def get_cooc( request=None, corpus=None
, distance='conditional'
, size=1000
, bridgeness=5
, mainList_id = None , stopList_id = None
, mainList_id = None , groupList_id = None
):
'''
get_ccoc : to compute the graph.
'''
data = {}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print("Cooccurrences do not exist yet, creating it.")
if stopList_id == None :
stopList_id = (session.query(Node.id).filter(Node.typename == "STOPLIST",
Node.parent_id == corpus.id).first())
if stopList_id == None :
raise ValueError("STOPLIST node needed for mainlist creation")
if mainList_id == None :
stopList_id = (session.query(Node.id).filter(
Node.typename == "STOPLIST",
Node.parent_id == corpus.id
).first())
if not mainList_id == None :
raise ValueError("STOPLIST node needed for mainlist creation")
if mainList_id == None :
mainList_id = ( session.query ( Node.id )
.filter( Node.typename == "MAINLIST"
, Node.parent_id == corpus.id
)
.first()
)
if mainList_id == None :
raise ValueError("MAINLIST node needed for cooccurrences")
if groupList_id == None :
groupList_id = ( session.query ( Node.id )
.filter( Node.typename == "GROUPLIST"
, Node.parent_id == corpus.id
)
.first()
)
if groupList_id == None :
raise ValueError("GROUPLIST node needed for cooccurrences")
# compute_cooc needs group, fields etc.
......@@ -72,16 +78,18 @@ def get_cooc( request=None, corpus=None
corpus = session.query(Node).filter(Node.id==corpus_id).first()
cooc_id = do_cooc( corpus=corpus
#, field1="ngrams", field2="ngrams"
, mainList_id=mainList_id, stopList_id=stopList_id
#, group_id=group_id
#, isMonopartite=True
, start=start , end =end
, threshold = threshold #, limit=size
)
#, field1="ngrams", field2="ngrams"
, mainList_id=int(mainList_id[0]), groupList_id=int(groupList_id[0])
#, isMonopartite=True
, start=start , end =end
, threshold = threshold #, limit=size
)
G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams"
, isMonopartite=True, distance=distance)
G, partition, ids, weight = do_distance ( cooc_id
, field1="ngrams", field2="ngrams"
, isMonopartite=True
, distance=distance
)
if type == "node_link":
nodesB_dict = {}
for node_id in G.nodes():
......
......@@ -8,9 +8,9 @@ from graphExplorer.functions import get_cooc
# TODO check authentication
class Graph(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
#@requires_auth
'''
REST part for graphs.
'''
def get(self, request, project_id, corpus_id):
'''
Graph.get :: Get graph data as REST api.
......@@ -23,8 +23,8 @@ class Graph(APIView):
field1 = str(request.GET.get ('field1' , 'ngrams' ))
field2 = str(request.GET.get ('field2' , 'ngrams' ))
start = request.GET.get ('start' , None )
end = request.GET.get ('end' , None )
start = request.GET.get ('start' , None )
end = request.GET.get ('end' , None )
threshold = int(request.GET.get ('threshold' , 1 ))
bridgeness = int(request.GET.get ('bridgeness', -1 ))
......@@ -43,17 +43,17 @@ class Graph(APIView):
if field2 in accepted_field2 :
if start is not None and end is not None :
data = get_cooc( corpus=corpus
#, field1=field1 , field2=field2
#, field1=field1 , field2=field2
, start=start , end=end
, threshold =threshold , distance=distance
)
else:
data = get_cooc( corpus = corpus
#, field1=field1, field2=field2
, threshold = threshold
, distance = distance
, bridgeness = bridgeness
)
#, field1=field1, field2=field2
, threshold = threshold
, distance = distance
, bridgeness = bridgeness
)
if format_ == 'json':
return JsonHttpResponse(data)
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment