Commit 4a5a0cd2 authored by delanoe's avatar delanoe

Merge remote-tracking branch 'origin/romain-testing' into testing-merge

parents 47398dcf b5ff7a7f
#!/bin/bash
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/srv/gargantext_lib/taggers/nlpserver/TurboParser/deps/local/lib:"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/srv/gargantext_lib/taggers/nlpserver:/srv/gargantext_lib/taggers/nlpserver/TurboParser/deps/local/lib:"
if [[ ! "$VIRTUAL_ENV" ]]
then
......
......@@ -27,7 +27,6 @@ def is_stop_word(ngram, stop_words=None):
# , "(.*)(\.)(.*)" trop fort (enlève les sigles !)
, "(.*)(\,)(.*)"
, "(.*)(< ?/?p ?>)(.*)" # marques de paragraphes
, "(.*)(study|elsevier)(.*)"
, "(.*)\b(xx|xi|xv)\b(.*)"
, "(.*)(result)(.*)"
, "(.*)(year|année|nombre|moitié)(.*)"
......@@ -37,6 +36,87 @@ def is_stop_word(ngram, stop_words=None):
, "(.*)(terme)(.*)"
, "(.*)(différent)(.*)"
, "(.*)(travers)(.*)"
# academic stamps
, ".*\belsevier\b.*"
, ".*\bwiley\b.*)"
, ".*\bspringer\b.*"
, ".*university press\b.*"
, ".*\bstudy\b.*"
# academic terms when alone ~~> usually not informative
, "hypothes[ie]s$"
, "analys[ie]s$"
, "bas[ie]s$"
, "online$"
, "importance$"
, "uses?$"
, "cases?$"
, "effects?$"
, "times?$"
, "methods?$"
, "types?$"
, "evidences?$"
, "findings?$"
, "relations?$"
, "terms?$"
, "procedures?$"
, "factors?$"
, "reports?$"
, "changes?$"
, "facts?$"
, "others?$"
, "applications?$"
, "periods?$"
, "investigations?$"
, "orders?$"
, "forms?$"
, "conditions?$"
, "situations?$"
, "papers?$"
, "relationships?$"
, "values?$"
, "areas?$"
, "techniques?$"
, "means?$"
, "conclusions?$"
, "comparisons?$"
, "parts?$"
, "amounts?$"
, "aims?$"
, "lacks?$"
, "issues?$"
, "ways?$"
, "ranges?$"
, "models?$"
, "articles?$"
, "series?$"
, "totals?$"
, "influences?$"
, "journals?$"
, "rules?$"
, "persons?$"
, "abstracts?$"
, "(?:book)? reviews?$"
, "process(?:es)?$"
, "approach(?:es)?$"
, "theor(?:y|ies)?$"
, "methodolog(?:y|ies)?$"
, "similarit(?:y|ies)?$"
, "possibilit(?:y|ies)?$"
, "stud(?:y|ies)?$"
# non-thematic or non-NP expressions
, "none$"
, "other(?: hand)?$"
, "whereas$"
, "usually$"
, "and$"
# , "vol$"
, "eds?$"
, "ltd$"
, "copyright$"
, "e-?mails?$"
, ".*="
, "=.*"
, "further(?:more)?$"
, "(.*)(:|\|)(.*)"
] :
compiled_regexes.append(compile(regex))
......
......@@ -85,8 +85,8 @@ class CSVLists(APIView):
# import the csv
try:
new_lists = import_ngramlists(csv_file)
print("===============================!!!")
print(new_lists)
print("======new_lists=========================!!!")
# print(new_lists) # very long
del csv_file
# merge the new_lists onto those of the target corpus
......
......@@ -38,7 +38,6 @@ def countCooccurrences( corpus_id=None , cooc_id=None
limit :: Int
'''
# FIXME remove the lines below after factorization of parameters
parameters = dict()
parameters['field1'] = field1
......@@ -68,14 +67,20 @@ def countCooccurrences( corpus_id=None , cooc_id=None
cooc_id = coocNode.id
else :
cooc_id = int(cooc_id[0])
# when cooc_id preexisted, but we want to continue (reset = True)
# (to give new contents to this cooc_id)
elif reset:
print("GRAPH #%s ... Counting new cooccurrences data." % cooc_id)
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == cooc_id ).delete()
session.commit()
# when cooc_id preexisted and we just want to load it (reset = False)
else:
print("GRAPH #%s ... Loading cooccurrences computed already." % cooc_id)
cooc = session.query( NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id, NodeNgramNgram.weight ).filter( NodeNgramNgram.node_id == cooc_id ).all()
return(int(cooc_id),WeightedMatrix(cooc))
if reset == True :
session.query( NodeNgramNgram ).filter( NodeNgramNgram.node_id == cooc_id ).delete()
session.commit()
NodeNgramX = aliased(NodeNgram)
......@@ -202,29 +207,29 @@ def countCooccurrences( corpus_id=None , cooc_id=None
#cooc_query = cooc_query.order_by(desc('cooc_score'))
matrix = WeightedMatrix(cooc_query)
print("GRAPH #%s Filtering the matrix with Map and Group Lists." % cooc_id)
cooc = filterMatrix(matrix, mapList_id, groupList_id)
parameters['MapList_id'] = str(mapList_id)
parameters['GroupList_id'] = str(groupList_id)
# TODO factorize savings on db
if save_on_db:
# Saving the cooccurrences
cooc.save(cooc_id)
print("GRAPH #%s ... Node Cooccurrence Matrix saved" % cooc_id)
# Saving the parameters
print("GRAPH #%s ... Parameters saved in Node." % cooc_id)
coocNode = session.query(Node).filter(Node.id==cooc_id).first()
coocNode.hyperdata["parameters"] = dict()
coocNode.hyperdata["parameters"] = parameters
coocNode.save_hyperdata()
session.commit()
#data = cooc2graph(coocNode.id, cooc, distance=distance, bridgeness=bridgeness)
#return data
return(coocNode.id, cooc)
......@@ -51,7 +51,7 @@ def compute_graph( corpus_id=None , cooc_id=None
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
, save_on_db = True , reset = reset
)
print("GRAPH #%d ... Cooccurrences computed." % (cooc_id))
......@@ -73,13 +73,13 @@ def compute_graph( corpus_id=None , cooc_id=None
node.hyperdata[distance] = dict()
node.hyperdata[distance][bridgeness] = data
node.hyperdata[distance]["nodes"] = len(G.nodes())
node.hyperdata[distance]["edges"] = len(G.edges())
node.save_hyperdata()
session.commit()
print("GRAPH #%d ... Notify by email owner of the graph." % cooc_id)
corpus = session.query(Node).filter(Node.id==corpus_id).first()
notify_owner(corpus, cooc_id, distance, bridgeness)
......@@ -99,25 +99,25 @@ def get_graph( request=None , corpus=None
'''
Get_graph : main steps:
0) Check the parameters
get_graph :: GraphParameters -> Either (Dic Nodes Links) (Dic State Length)
where type Length = Int
get_graph first checks the parameters and return either graph data or a dict with
state "type" with an integer to indicate the size of the parameter
get_graph first checks the parameters and return either graph data or a dict with
state "type" with an integer to indicate the size of the parameter
(maybe we could add a String in that step to factor and give here the error message)
1) compute_graph (see function above)
2) return graph
'''
overwrite_node_contents = False
# Case of graph has been computed already
if cooc_id is not None:
print("GRAPH#%d ... Loading data already computed." % int(cooc_id))
node = session.query(Node).filter(Node.id == cooc_id).first()
# Structure of the Node.hyperdata[distance][bridbeness]
# All parameters (but distance and bridgeness)
# are in Node.hyperdata["parameters"]
......@@ -130,6 +130,25 @@ def get_graph( request=None , corpus=None
if graph.get(str(bridgeness), None) is not None:
return graph[str(bridgeness)]
# new graph: we give it an empty node with new id and status
elif saveOnly:
# NB: we do creation already here (instead of same in countCooccurrences)
# to guarantee a unique ref id to the saveOnly graph (async generation)
new_node = corpus.add_child(
typename = "COOCCURRENCES",
name = "GRAPH (in corpus %s)" % corpus.id
)
session.add(new_node)
session.commit()
cooc_id = new_node.id
cooc_name = new_node.name
cooc_date = new_node.date
# and the empty content will need redoing by countCooccurrences
overwrite_node_contents = True
print("GRAPH #%d ... Created new empty data node for saveOnly" % int(cooc_id))
# Case of graph has not been computed already
# First, check the parameters
......@@ -198,10 +217,14 @@ def get_graph( request=None , corpus=None
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
, save_on_db = True , reset=overwrite_node_contents
#, limit=size
)
return {"state" : "saveOnly"}
return {"state" : "saveOnly",
"target_id" : cooc_id,
"target_name": cooc_name,
"target_date": cooc_date}
elif corpus_size > graph_constraints['corpusMax']:
# Then compute cooc asynchronously with celery
......@@ -211,10 +234,10 @@ def get_graph( request=None , corpus=None
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
, save_on_db = True , reset=overwrite_node_contents
#, limit=size
)
# Dict to inform user that corpus maximum is reached
# Dict to inform user that corpus maximum is reached
# then graph is computed asynchronously
return {"state" : "corpusMax", "length" : corpus_size}
......@@ -230,7 +253,7 @@ def get_graph( request=None , corpus=None
, mapList_id=mapList_id , groupList_id=groupList_id
, isMonopartite=True , threshold = threshold
, distance=distance , bridgeness=bridgeness
, save_on_db = True
, save_on_db = True , reset=overwrite_node_contents
#, limit=size
)
......
......@@ -18,6 +18,11 @@ class Graph(APIView):
Get all the parameters first
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
NB save new graph mode
(option saveOnly=True without a cooc_id)
can return the new cooc id in the json
before counting + filling data in async
'''
if not request.user.is_authenticated():
......@@ -56,7 +61,6 @@ class Graph(APIView):
type_ = str(request.GET.get ('type' , 'node_link' ))
distance = str(request.GET.get ('distance' , 'conditional'))
# Get default map List of corpus
if mapList_id == 0 :
mapList_id = ( session.query ( Node.id )
......@@ -100,7 +104,7 @@ class Graph(APIView):
, field1=field1 , field2=field2
, mapList_id = mapList_id , groupList_id = groupList_id
, start=start , end=end
, threshold =threshold
, threshold =threshold
, distance=distance , bridgeness=bridgeness
, saveOnly=saveOnly
)
......@@ -127,10 +131,12 @@ class Graph(APIView):
# async data case
link = "http://%s/projects/%d/corpora/%d/myGraphs" % (request.get_host(), corpus.parent_id, corpus.id)
return JsonHttpResponse({
'msg': '''Your graph is saved:
'id': data["target_id"],
'name': data["target_name"],
'date': data["target_date"],
'msg': '''Your graph is being saved:
%s
''' % format_html(link),
''' % format_html(link)
}, status=200)
elif data["state"] == "corpusMin":
......
......@@ -78,6 +78,8 @@ def myGraphs(request, project_id, corpus_id):
#coocs_count[cooc.id] = len(cooc_nodes)
coocs_count[cooc.id] = len([cooc_node for cooc_node in cooc_nodes if cooc_node[1] > 1])
print("coocs_count a posteriori", coocs_count)
return render(
template_name = 'pages/corpora/myGraphs.html',
request = request,
......
......@@ -200,8 +200,34 @@ function getRessources(){
//// POST TO API
//// PUT AND PATCH TO API
function deleteOne(url){
function deleteOne(url, thatButton){
// we just show wait image before ajax
var $thatButton = $(thatButton)
var alreadyWaiting = $thatButton.has($('.wait-img-active')).length
if (! alreadyWaiting) {
var previousButtonContent = $thatButton.html()
var availableWidth = $thatButton.width()
var $myWaitImg = $('#wait-img').clone()
$myWaitImg.attr("id", null)
.attr("class","wait-img-active pull-right")
.width(availableWidth)
.css("display", "block") ;
$thatButton.append($myWaitImg)
}
else {
// uncomment if user should stop clicking ;)
// $thatButton.addClass("btn-danger")
// uncomment to prevent a 2nd ajax
return false
}
// now the real ajax
$.ajax({
url: '/api'+url,
type: 'delete',
......@@ -306,7 +332,7 @@ $(document).on('change', 'input[type=checkbox]', function() {
$(document).on("click","#delete", function(){
var selected = selectedUrls();
selected.forEach(function(url) {
deleteOne(url);
deleteOne(url, this);
});
//window.location.reload();
});
......@@ -336,7 +362,7 @@ $(document).on("click","#recalculate", function(){
// UNIQUE DELETION
$(document).on("click", ".delete", function() {
var url = $( this ).data("url");
deleteOne(url);
deleteOne(url, this);
//window.location.reload();
});
......
This diff is collapsed.
......@@ -56,15 +56,15 @@
<div class="clearfix"></div>
</div>
<div class="center">
<button type="submit" class="btn btn-primary btn-rounded">Login</button>
</div>
<div>
<p>
<center>
By submitting I accept the terms of uses [DOC1] [DOC2].
</center>
</p>
<div class="checkbox">
<label>
<input id="terms-accept" type="checkbox" value="" onclick="enableAuth(this)">
I accept the terms of uses [DOC1] [DOC2].
</input>
</label>
</div>
<button id="login-button" type="submit" class="btn btn-primary btn-rounded" disabled>Login</button>
</div>
</form>
</div>
</div>
......@@ -79,6 +79,16 @@
<!-- Bootstrap -->
<script src="{% static "lib/bootstrap/3.2.0/bootstrap.min.js" %}"></script>
<!-- checkbox => submit -->
<script type="text/javascript">
var okButton = document.getElementById('login-button')
function enableAuth(box) {
okButton.disabled = ! box.checked
}
</script>
</body>
......
......@@ -67,7 +67,8 @@
</li>
{% endif %}
</ul>
<ul class="nav pull-right">
<ul class="nav navbar-nav pull-right">
<li class="dropdown">
<a href="#" role="button" class="dropdown-toggle navbar-text" data-toggle="dropdown" title="That is your username">
<i class="icon-user"></i>
......
......@@ -18,6 +18,18 @@
.ui-autocomplete .ui-menu-item {
font-size:x-small;
}
/* for wait gif in buttons */
.wait-img-active {
margin-left: .5em;
}
/* hover red like btn_danger */
.btn.delete:hover {
color: #fff;
background-color: #c9302c;
border-color: #ac2925;
}
</style>
{% endblock %}
......@@ -97,6 +109,7 @@
<!--here loading projectlist from GET /projects-->
</div>
<img id="wait-img" width="90%" style="display:none" src="{% static "img/ajax-loader.gif"%}"></img>
</div>
......@@ -116,10 +129,10 @@
<!-- DELETE PROJECT -->
<button type="button" class="btn btn-default delete pull-right" data-url="{url}" >
<span class="glyphicon glyphicon-trash pull-right" aria-hidden="true"></span>
<span class="glyphicon glyphicon-trash" aria-hidden="true"></span>
</button>
<!-- EDIT PROJECT-->
<!-- EDIT PROJECT-->
<button class="btn btn-default edit pull-right" data-id="{id}" data-url="{url}" data-toggle="collapse">
<span class="glyphicon glyphicon-pencil" aria-hidden="true"></span>
</button>
......
......@@ -23,6 +23,11 @@
margin-top: .3em;
color: grey ;
}
ul.inside-popover {
padding: .5em ;
list-style-type: none ;
}
</style>
{% endblock %}
......@@ -111,9 +116,9 @@
<button type="button" class="btn btn-default {% if not state.complete %}hidden{% endif %}" data-container="body" data-toggle="popover" data-placement="bottom" data-trigger="focus"
data-content="
<ul>
<ul class=&quot;inside-popover&quot;>
<li
onclick=&quot;updateCorpus({{corpus.id}})&quot;>
onclick=&quot;updateCorpus(event, {{corpus.id}})&quot;>
<a href='#'>Recalculate ngram metrics</a> <br/> (can take a little while)
</li>
</ul>
......@@ -125,19 +130,9 @@
<!-- TODO: delete non seulement si state.complete mais aussi si state.error -->
<button type="button" class="btn btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content="
<ul>
<ul id=&quot;{{corpus.id}}_trash_msg&quot; class=&quot;inside-popover&quot;>
<li
onclick=&quot;
trashedIds[{{corpus.id}}] = true ;
garganrest.nodes.delete(
{{corpus.id}},
function(){
$('#corpus_'+{{corpus.id}}).remove()
delete trashedIds[{{corpus.id}}]
}
);
$(this).parent().parent().remove();
&quot;>
onclick=&quot;deleteCorpus(event, {{corpus.id}})&quot;>
<a href='#'>Delete this</a>
</li>
</ul>
......@@ -798,7 +793,36 @@
});
}
function updateCorpus(corpusId) {
function deleteCorpus(e, corpusId) {
// prevents scroll back to top of page
e.preventDefault()
// register pending operation
trashedIds[corpusId] = true ;
// visual loader wheel
var statusDiv = document.getElementById("corpus_"+corpusId+"_status")
statusDiv.innerHTML = '<img width="10%" src="{% static "img/ajax-loader.gif"%}"></img>'
var trashMsgDiv = document.getElementById(corpusId+"_trash_msg")
trashMsgDiv.innerHTML = '<h5>Deleting corpus, please wait</h5>'
// REST and callback
garganrest.nodes.delete(
corpusId,
function(){
$('#corpus_'+corpusId).remove()
delete trashedIds[corpusId]
// remove any popover too
$('.popover').remove();
}
);
}
function updateCorpus(e, corpusId) {
// prevents scroll back to top of page
e.preventDefault()
// show 'waiting'
var statusDiv = document.getElementById("corpus_"+corpusId+"_status")
var previousStatus = statusDiv.innerHTML
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment