Commit e42e2b8d authored by delanoe's avatar delanoe

Merge remote-tracking branch 'origin/romain-testing' into testing-merge

parents 9f252d49 1697b72c
...@@ -151,12 +151,15 @@ ...@@ -151,12 +151,15 @@
transition: all 0.25s linear; transition: all 0.25s linear;
} }
.selection { /* this was used for the p or div that *contained* a selection */
/*.selection {
color: #aaa; color: #aaa;
} }*/
/* this is used for the selected text itself */
::selection { ::selection {
color: black; color: black;
background-color: rgba(0, 0, 0, 0.4); background-color: #aaa;
} }
.noselection { .noselection {
......
...@@ -97,6 +97,21 @@ ...@@ -97,6 +97,21 @@
// +propToRead+" ("+cache[propToRead]+")") // +propToRead+" ("+cache[propToRead]+")")
params[key] = cache[propToRead] params[key] = cache[propToRead]
} }
else if (typeof val == "object" && val["fromCacheIfElse"]) {
var propToReadIf = val["fromCacheIfElse"][0]
var propToReadElse = val["fromCacheIfElse"][1]
// console.log("reading from cache: response data property " +
// "if:"+propToReadIf+" ("+cache[propToReadIf]+")"+
// " else:"+propToReadElse+" ("+cache[propToReadElse]+")")
var valueIf = cache[propToReadIf]
var valueElse = cache[propToReadElse]
if (valueIf && valueIf != 'null' && valueIf != '') {
params[key] = valueIf
}
else {
params[key] = valueElse
}
}
} }
// Now we run the call // Now we run the call
...@@ -149,8 +164,8 @@ ...@@ -149,8 +164,8 @@
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
// debug // debug
// console.log("==> $rootScope <==") console.log("==> $rootScope <==")
// console.log($rootScope) console.log($rootScope)
}); });
})(window); })(window);
This diff is collapsed.
...@@ -90,17 +90,20 @@ ...@@ -90,17 +90,20 @@
* MainApiAddNgramHttpService: Create and index a new ngram * MainApiAddNgramHttpService: Create and index a new ngram
* =========================== * ===========================
* route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id * route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
* ------ *
* NB it also checks if ngram exists (returns the preexisting id)
* and if it has a mainform/group (via 'testgroup' option)
* (useful if we add it to a list afterwards)
* *
*/ */
http.factory('MainApiAddNgramHttpService', function($resource) { http.factory('MainApiAddNgramHttpService', function($resource) {
return $resource( return $resource(
// adding explicit "http://" b/c this a cross origin request // adding explicit "http://" b/c this a cross origin request
'http://' + window.GARG_ROOT_URL 'http://' + window.GARG_ROOT_URL
+ "/api/ngrams?text=:ngramStr&corpus=:corpusId", + "/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup",
{ {
ngramStr: '@ngramStr', ngramStr: '@ngramStr',
corpusId: '@corpusId' corpusId: '@corpusId',
}, },
{ {
put: { put: {
......
...@@ -141,9 +141,9 @@ ...@@ -141,9 +141,9 @@
crudCallsToMake = [ crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put', {'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId}, 'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] }, 'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put', {'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } } 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
]; ];
break; break;
...@@ -151,9 +151,9 @@ ...@@ -151,9 +151,9 @@
crudCallsToMake = [ crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put', {'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId}, 'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] }, 'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put', {'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } } 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
]; ];
break; break;
...@@ -161,11 +161,11 @@ ...@@ -161,11 +161,11 @@
crudCallsToMake = [ crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put', {'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId}, 'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] }, 'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put', {'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCache': 'id'} } }, 'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
{'service': MainApiChangeNgramHttpService, 'action': 'put', {'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } } 'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
]; ];
break; break;
} }
......
...@@ -19,6 +19,7 @@ procedure: ...@@ -19,6 +19,7 @@ procedure:
from gargantext.models import Ngram, Node, NodeNgram from gargantext.models import Ngram, Node, NodeNgram
from gargantext.util.db import session, bulk_insert from gargantext.util.db import session, bulk_insert
from gargantext.util.db import bulk_insert_ifnotexists # £TODO debug
from sqlalchemy import distinct from sqlalchemy import distinct
from re import findall, IGNORECASE from re import findall, IGNORECASE
...@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )): ...@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@param keys: the hyperdata fields to index @param keys: the hyperdata fields to index
""" """
# check the ngrams we won't process (those that were already indexed) # retrieve *all* the ngrams from our list
indexed_ngrams_subquery = (session # (even if some relations may be already indexed
.query(distinct(NodeNgram.ngram_id)) # b/c they were perhaps not extracted in all docs
.join(Node, Node.id == NodeNgram.node_id) # => we'll use already_indexed later)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == 'DOCUMENT')
.subquery()
)
# retrieve the ngrams from our list, filtering out the already indexed ones
todo_ngrams = (session todo_ngrams = (session
.query(Ngram) .query(Ngram)
.filter(Ngram.id.in_(ngram_ids)) .filter(Ngram.id.in_(ngram_ids))
.filter(~ Ngram.id.in_(indexed_ngrams_subquery))
.all() .all()
) )
...@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )): ...@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
else: else:
node_ngram_to_write[doc.id][ngram.id] += n_occs node_ngram_to_write[doc.id][ngram.id] += n_occs
# debug
# print("new node_ngrams before filter:", node_ngram_to_write)
# check the relations we won't insert (those that were already indexed)
# NB costly but currently impossible with bulk_insert_ifnotexists
# b/c double uniquekey
already_indexed = (session
.query(NodeNgram.node_id, NodeNgram.ngram_id)
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == 'DOCUMENT')
.all()
)
filter_out = {(nd_id,ng_id) for (nd_id,ng_id) in already_indexed}
# POSSIBLE update those that are filtered out if wei_previous != wei
# integrate all at the end # integrate all at the end
my_new_rows = [] my_new_rows = []
add_new_row = my_new_rows.append add_new_row = my_new_rows.append
for doc_id in node_ngram_to_write: for doc_id in node_ngram_to_write:
for ngram_id in node_ngram_to_write[doc_id]: for ngram_id in node_ngram_to_write[doc_id]:
wei = node_ngram_to_write[doc_id][ngram_id] if (doc_id, ngram_id) not in filter_out:
add_new_row([doc_id, ngram_id, wei]) wei = node_ngram_to_write[doc_id][ngram_id]
add_new_row([doc_id, ngram_id, wei])
del node_ngram_to_write del node_ngram_to_write
# debug
# print("new node_ngrams after filter:", my_new_rows)
bulk_insert( bulk_insert(
table = NodeNgram, table = NodeNgram,
fields = ('node_id', 'ngram_id', 'weight'), fields = ('node_id', 'ngram_id', 'weight'),
data = my_new_rows data = my_new_rows
) )
# bulk_insert_ifnotexists(
# model = NodeNgram,
# uniquekey = ('node_id','ngram_id'), <= currently impossible
# fields = ('node_id', 'ngram_id', 'weight'),
# data = my_new_rows
# )
n_added = len(my_new_rows) n_added = len(my_new_rows)
print("index_new_ngrams: added %i new NodeNgram rows" % n_added) print("index_new_ngrams: added %i new NodeNgram rows" % n_added)
......
...@@ -2,8 +2,8 @@ from gargantext.util.http import ValidationException, APIView \ ...@@ -2,8 +2,8 @@ from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse\ , get_parameters, JsonHttpResponse\
, HttpResponse , HttpResponse
from gargantext.util.db import session, func from gargantext.util.db import session, func
from gargantext.util.db_cache import cache from gargantext.util.db_cache import cache
from gargantext.models import Node, Ngram, NodeNgram from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
from re import findall from re import findall
...@@ -21,7 +21,7 @@ class ApiNgrams(APIView): ...@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
""" """
Used for analytics Used for analytics
------------------ ------------------
Get ngram listing + counts in a given scope Get ngram listing + counts in a given scope
""" """
# parameters retrieval and validation # parameters retrieval and validation
...@@ -83,24 +83,30 @@ class ApiNgrams(APIView): ...@@ -83,24 +83,30 @@ class ApiNgrams(APIView):
1 - checks user authentication before any changes 1 - checks user authentication before any changes
2 - adds the ngram to Ngram table in DB 2 - checks if ngram to Ngram table in DB
if yes returns ngram_id and optionally mainform_id
otherwise continues
3 - adds the ngram to Ngram table in DB
3 - (if corpus param is present) 4 - (if corpus param is present)
adds the ngram doc counts to NodeNgram table in DB adds the ngram doc counts to NodeNgram table in DB
(aka "index the ngram" throught the docs of the corpus) (aka "index the ngram" throught the docs of the corpus)
4 - returns json with: 5 - returns json with:
'msg' => a success msg 'msg' => a success msg
'text' => the initial text content 'text' => the initial text content
'term' => the normalized text content 'term' => the normalized text content
'id' => the new ngram_id 'id' => the new ngram_id
'count' => the number of docs with the ngram in the corpus 'count' => the number of docs with the ngram in the corpus
(if corpus param is present) (if corpus param is present)
'group' => the mainform_id if applicable
possible inline parameters possible inline parameters
-------------------------- --------------------------
@param text=<ngram_string> [required] @param text=<ngram_string> [required]
@param corpus=<CORPUS_ID> [optional] @param corpus=<CORPUS_ID> [optional]
@param testgroup (true if present) [optional, requires corpus]
""" """
# 1 - check user authentication # 1 - check user authentication
...@@ -122,6 +128,9 @@ class ApiNgrams(APIView): ...@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
It requires a "text" parameter,\ It requires a "text" parameter,\
for instance /api/ngrams?text=hydrometallurgy') for instance /api/ngrams?text=hydrometallurgy')
if ('testgroup' in params) and (not ('corpus' in params)):
raise ValidationException("'testgroup' param requires 'corpus' param")
# if we have a 'corpus' param (to do the indexing)... # if we have a 'corpus' param (to do the indexing)...
do_indexation = False do_indexation = False
if 'corpus' in params: if 'corpus' in params:
...@@ -143,10 +152,33 @@ class ApiNgrams(APIView): ...@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
try: try:
log_msg = "" log_msg = ""
ngram_id = None ngram_id = None
mainform_id = None
preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first() preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
if preexisting is not None: if preexisting is not None:
ngram_id = preexisting.id ngram_id = preexisting.id
log_msg += "ngram already existed (id %i)\n" % ngram_id log_msg += "ngram already existed (id %i)\n" % ngram_id
# in the context of a corpus we can also check if has mainform
# (useful for)
if 'testgroup' in params:
groupings_id = (session.query(Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.typename == 'GROUPLIST')
.first()
)
had_mainform = (session.query(NodeNgramNgram.ngram1_id)
.filter(NodeNgramNgram.node_id == groupings_id)
.filter(NodeNgramNgram.ngram2_id == preexisting.id)
.first()
)
if had_mainform:
mainform_id = had_mainform[0]
log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
else:
log_msg += "ngram was not in any group for this corpus"
else: else:
# 2 - insert into Ngrams # 2 - insert into Ngrams
new_ngram = Ngram(terms=ngram_str, n=ngram_size) new_ngram = Ngram(terms=ngram_str, n=ngram_size)
...@@ -165,6 +197,7 @@ class ApiNgrams(APIView): ...@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
'text': original_text, 'text': original_text,
'term': ngram_str, 'term': ngram_str,
'id' : ngram_id, 'id' : ngram_id,
'group' : mainform_id,
'count': n_added if do_indexation else 'no corpus provided for indexation' 'count': n_added if do_indexation else 'no corpus provided for indexation'
}, 200) }, 200)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment