Commit 4f20bdb3 authored by delanoe's avatar delanoe

[FACTO/FIX] Facto graphs, removing all the tests we made, cleaning code.

parent 49631d60
...@@ -11,12 +11,10 @@ import datetime ...@@ -11,12 +11,10 @@ import datetime
def do_cooc(corpus=None def do_cooc(corpus=None
, field1='ngrams', field2='ngrams' , field1='ngrams', field2='ngrams'
, mainList_id=None, stopList_id=None, groupList_id=None , mainList_id=None, groupList_id=None
, coocNode_id=None , coocNode_id=None
, cvalue_id=None
, n_min=1, n_max=None
, start=None, end=None , start=None, end=None
, limit=1000 , n_min=1, n_max=None , limit=1000
, isMonopartite=True , isMonopartite=True
, threshold = 3): , threshold = 3):
''' '''
...@@ -24,9 +22,8 @@ def do_cooc(corpus=None ...@@ -24,9 +22,8 @@ def do_cooc(corpus=None
For the moment list of paramters are not supported because, lists need to For the moment list of paramters are not supported because, lists need to
be merged before. be merged before.
corpus :: Corpus corpus :: Corpus
cvalue_id :: Int
mainList_id :: Int mainList_id :: Int
stopList_id :: Int
groupList_id :: Int groupList_id :: Int
For the moment, start and end are simple, only year is implemented yet For the moment, start and end are simple, only year is implemented yet
...@@ -42,10 +39,13 @@ def do_cooc(corpus=None ...@@ -42,10 +39,13 @@ def do_cooc(corpus=None
# Get node # Get node
if not coocNode_id: if not coocNode_id:
coocNode_id0 = (session.query(Node.id).filter(Node.typename == "COOCCURRENCES" coocNode_id0 = ( session.query( Node.id )
, Node.name == "GRAPH EXPLORER" .filter( Node.typename == "COOCCURRENCES"
, Node.parent_id == corpus.id , Node.name == "GRAPH EXPLORER"
).first()) , Node.parent_id == corpus.id
)
.first()
)
if not coocNode_id: if not coocNode_id:
coocNode = corpus.add_child( coocNode = corpus.add_child(
typename = "COOCCURRENCES", typename = "COOCCURRENCES",
...@@ -76,8 +76,8 @@ def do_cooc(corpus=None ...@@ -76,8 +76,8 @@ def do_cooc(corpus=None
# #
# node_cooc.hyperdata = hyperdata # node_cooc.hyperdata = hyperdata
# #
# For tests only # For tests only : delete previous cooccurrences
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==coocNode_id).delete() session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == coocNode_id ).delete()
session.commit() session.commit()
...@@ -85,37 +85,57 @@ def do_cooc(corpus=None ...@@ -85,37 +85,57 @@ def do_cooc(corpus=None
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score') cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score') #cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query])
if isMonopartite : if isMonopartite :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (session.query( NodeNgramX.ngram_id
.join(Node, Node.id == NodeNgramX.node_id) , NodeNgramY.ngram_id
.join(NodeNgramY, NodeNgramY.node_id == Node.id) , cooc_score
.filter(Node.parent_id==corpus.id, Node.typename=="DOCUMENT") )
) .join( Node
, Node.id == NodeNgramX.node_id
)
.join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.filter( Node.parent_id==corpus.id
, Node.typename=="DOCUMENT"
)
)
else : else :
NodeNgramY = aliased(NodeNgram) NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score) cooc_query = (session.query( NodeHyperdataNgram.ngram_id
.join(Node, Node.id == NodeHyperdataNgram.node_id) , NodeNgramY.ngram_id
.join(NodeNgramY, NodeNgramY.node_id == Node.id) , cooc_score
.join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id) )
.filter(Node.parent_id == corpus.id, Node.typename == "DOCUMENT") .join( Node
.filter(Hyperdata.name == field1) , Node.id == NodeHyperdataNgram.node_id
) )
#print(cooc_query) .join( NodeNgramY
, NodeNgramY.node_id == Node.id
)
.join( Hyperdata
, Hyperdata.id == NodeHyperdataNgram.hyperdata_id
)
.filter( Node.parent_id == corpus.id
, Node.typename == "DOCUMENT"
)
.filter( Hyperdata.name == field1 )
)
# Size of the ngrams between n_min and n_max # Size of the ngrams between n_min and n_max
if n_min is not None or n_max is not None: if n_min is not None or n_max is not None:
if isMonopartite: if isMonopartite:
NgramX = aliased(Ngram) NgramX = aliased(Ngram)
cooc_query = cooc_query.join(NgramX, NgramX.id == NodeNgramX.ngram_id) cooc_query = cooc_query.join ( NgramX
, NgramX.id == NodeNgramX.ngram_id
)
NgramY = aliased(Ngram) NgramY = aliased(Ngram)
cooc_query = (cooc_query cooc_query = cooc_query.join ( NgramY
.join(NgramY, NgramY.id == NodeNgramY.ngram_id) , NgramY.id == NodeNgramY.ngram_id
) )
if n_min is not None: if n_min is not None:
cooc_query = (cooc_query cooc_query = (cooc_query
...@@ -173,42 +193,14 @@ def do_cooc(corpus=None ...@@ -173,42 +193,14 @@ def do_cooc(corpus=None
cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id) cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
cooc_query = cooc_query.order_by(desc('cooc_score')) cooc_query = cooc_query.order_by(desc('cooc_score'))
# END of the query
matrix = WeightedMatrix(cooc_query) matrix = WeightedMatrix(cooc_query)
#print(matrix)
# Select according some scores # Select according some scores
if cvalue_id is not None :
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list = UnweightedList(session.query(NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.nodex_id == cvalue_id).all()
)
if isMonopartite: mainList = UnweightedList( mainList_id )
if mainList_id is not None : group_list = Translations ( groupList_id )
miam_list = UnweightedList(mainList_id) cooc = matrix & (mainList * group_list)
if stopList_id is not None :
stop_list = UnweightedList(stopList_id)
if groupList_id is not None :
group_list = Translations(groupList_id)
if mainList_id is not None and stopList_id is None and groupList_id is None :
cooc = matrix & miam_list
elif mainList_id is not None and stopList_id is not None and groupList_id is None :
cooc = matrix & (miam_list - stop_list)
elif mainList_id is not None and stopList_id is not None and groupList_id is not None :
print("mainList_id is not None and stopList_id is not None and groupList_id is not None")
cooc = matrix & (miam_list * group_list - stop_list)
#cooc = matrix & (miam_list - stop_list)
elif mainList_id is not None and stopList_id is None and groupList_id is not None :
cooc = matrix & (miam_list * group_list)
else :
cooc = matrix
else:
cooc = matrix
cooc.save(coocNode_id) cooc.save(coocNode_id)
return(coocNode_id) return(coocNode_id)
...@@ -29,30 +29,36 @@ def get_cooc( request=None, corpus=None ...@@ -29,30 +29,36 @@ def get_cooc( request=None, corpus=None
, distance='conditional' , distance='conditional'
, size=1000 , size=1000
, bridgeness=5 , bridgeness=5
, mainList_id = None , stopList_id = None , mainList_id = None , groupList_id = None
): ):
''' '''
get_ccoc : to compute the graph. get_ccoc : to compute the graph.
''' '''
data = {} data = {}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print("Cooccurrences do not exist yet, creating it.")
if stopList_id == None :
stopList_id = (session.query(Node.id).filter(Node.typename == "STOPLIST",
Node.parent_id == corpus.id).first())
if stopList_id == None :
raise ValueError("STOPLIST node needed for mainlist creation")
if mainList_id == None :
stopList_id = (session.query(Node.id).filter(
Node.typename == "STOPLIST",
Node.parent_id == corpus.id
).first())
if not mainList_id == None :
raise ValueError("STOPLIST node needed for mainlist creation")
if mainList_id == None :
mainList_id = ( session.query ( Node.id )
.filter( Node.typename == "MAINLIST"
, Node.parent_id == corpus.id
)
.first()
)
if mainList_id == None :
raise ValueError("MAINLIST node needed for cooccurrences")
if groupList_id == None :
groupList_id = ( session.query ( Node.id )
.filter( Node.typename == "GROUPLIST"
, Node.parent_id == corpus.id
)
.first()
)
if groupList_id == None :
raise ValueError("GROUPLIST node needed for cooccurrences")
# compute_cooc needs group, fields etc. # compute_cooc needs group, fields etc.
...@@ -72,16 +78,18 @@ def get_cooc( request=None, corpus=None ...@@ -72,16 +78,18 @@ def get_cooc( request=None, corpus=None
corpus = session.query(Node).filter(Node.id==corpus_id).first() corpus = session.query(Node).filter(Node.id==corpus_id).first()
cooc_id = do_cooc( corpus=corpus cooc_id = do_cooc( corpus=corpus
#, field1="ngrams", field2="ngrams" #, field1="ngrams", field2="ngrams"
, mainList_id=mainList_id, stopList_id=stopList_id , mainList_id=int(mainList_id[0]), groupList_id=int(groupList_id[0])
#, group_id=group_id #, isMonopartite=True
#, isMonopartite=True , start=start , end =end
, start=start , end =end , threshold = threshold #, limit=size
, threshold = threshold #, limit=size )
)
G, partition, ids, weight = do_distance(cooc_id, field1="ngrams", field2="ngrams" G, partition, ids, weight = do_distance ( cooc_id
, isMonopartite=True, distance=distance) , field1="ngrams", field2="ngrams"
, isMonopartite=True
, distance=distance
)
if type == "node_link": if type == "node_link":
nodesB_dict = {} nodesB_dict = {}
for node_id in G.nodes(): for node_id in G.nodes():
......
...@@ -8,9 +8,9 @@ from graphExplorer.functions import get_cooc ...@@ -8,9 +8,9 @@ from graphExplorer.functions import get_cooc
# TODO check authentication # TODO check authentication
class Graph(APIView): class Graph(APIView):
#authentication_classes = (SessionAuthentication, BasicAuthentication) '''
REST part for graphs.
#@requires_auth '''
def get(self, request, project_id, corpus_id): def get(self, request, project_id, corpus_id):
''' '''
Graph.get :: Get graph data as REST api. Graph.get :: Get graph data as REST api.
...@@ -23,8 +23,8 @@ class Graph(APIView): ...@@ -23,8 +23,8 @@ class Graph(APIView):
field1 = str(request.GET.get ('field1' , 'ngrams' )) field1 = str(request.GET.get ('field1' , 'ngrams' ))
field2 = str(request.GET.get ('field2' , 'ngrams' )) field2 = str(request.GET.get ('field2' , 'ngrams' ))
start = request.GET.get ('start' , None ) start = request.GET.get ('start' , None )
end = request.GET.get ('end' , None ) end = request.GET.get ('end' , None )
threshold = int(request.GET.get ('threshold' , 1 )) threshold = int(request.GET.get ('threshold' , 1 ))
bridgeness = int(request.GET.get ('bridgeness', -1 )) bridgeness = int(request.GET.get ('bridgeness', -1 ))
...@@ -43,17 +43,17 @@ class Graph(APIView): ...@@ -43,17 +43,17 @@ class Graph(APIView):
if field2 in accepted_field2 : if field2 in accepted_field2 :
if start is not None and end is not None : if start is not None and end is not None :
data = get_cooc( corpus=corpus data = get_cooc( corpus=corpus
#, field1=field1 , field2=field2 #, field1=field1 , field2=field2
, start=start , end=end , start=start , end=end
, threshold =threshold , distance=distance , threshold =threshold , distance=distance
) )
else: else:
data = get_cooc( corpus = corpus data = get_cooc( corpus = corpus
#, field1=field1, field2=field2 #, field1=field1, field2=field2
, threshold = threshold , threshold = threshold
, distance = distance , distance = distance
, bridgeness = bridgeness , bridgeness = bridgeness
) )
if format_ == 'json': if format_ == 'json':
return JsonHttpResponse(data) return JsonHttpResponse(data)
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment