Commit 78eb26d4 authored by delanoe's avatar delanoe

Merge remote-tracking branch 'origin/romain-testing' into testing-merge

parents 4bccaa96 5a2f7efb
......@@ -151,12 +151,15 @@
transition: all 0.25s linear;
}
.selection {
/* this was used for the p or div that *contained* a selection */
/*.selection {
color: #aaa;
}
}*/
/* this is used for the selected text itself */
::selection {
color: black;
background-color: rgba(0, 0, 0, 0.4);
background-color: #aaa;
}
.noselection {
......
......@@ -97,6 +97,21 @@
// +propToRead+" ("+cache[propToRead]+")")
params[key] = cache[propToRead]
}
else if (typeof val == "object" && val["fromCacheIfElse"]) {
var propToReadIf = val["fromCacheIfElse"][0]
var propToReadElse = val["fromCacheIfElse"][1]
// console.log("reading from cache: response data property " +
// "if:"+propToReadIf+" ("+cache[propToReadIf]+")"+
// " else:"+propToReadElse+" ("+cache[propToReadElse]+")")
var valueIf = cache[propToReadIf]
var valueElse = cache[propToReadElse]
if (valueIf && valueIf != 'null' && valueIf != '') {
params[key] = valueIf
}
else {
params[key] = valueElse
}
}
}
// Now we run the call
......@@ -149,8 +164,8 @@
// -------------------------------------------------------------------------
// debug
// console.log("==> $rootScope <==")
// console.log($rootScope)
console.log("==> $rootScope <==")
console.log($rootScope)
});
})(window);
This diff is collapsed.
......@@ -90,17 +90,20 @@
* MainApiAddNgramHttpService: Create and index a new ngram
* ===========================
* route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
* ------
*
* NB it also checks if ngram exists (returns the preexisting id)
* and if it has a mainform/group (via 'testgroup' option)
* (useful if we add it to a list afterwards)
*
*/
http.factory('MainApiAddNgramHttpService', function($resource) {
return $resource(
// adding explicit "http://" b/c this a cross origin request
'http://' + window.GARG_ROOT_URL
+ "/api/ngrams?text=:ngramStr&corpus=:corpusId",
+ "/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup",
{
ngramStr: '@ngramStr',
corpusId: '@corpusId'
corpusId: '@corpusId',
},
{
put: {
......
......@@ -141,9 +141,9 @@
crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
];
break;
......@@ -151,9 +151,9 @@
crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
];
break;
......@@ -161,11 +161,11 @@
crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCache': 'id'} } },
'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
];
break;
}
......
......@@ -19,6 +19,7 @@ procedure:
from gargantext.models import Ngram, Node, NodeNgram
from gargantext.util.db import session, bulk_insert
from gargantext.util.db import bulk_insert_ifnotexists # £TODO debug
from sqlalchemy import distinct
from re import findall, IGNORECASE
......@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@param keys: the hyperdata fields to index
"""
# check the ngrams we won't process (those that were already indexed)
indexed_ngrams_subquery = (session
.query(distinct(NodeNgram.ngram_id))
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == 'DOCUMENT')
.subquery()
)
# retrieve the ngrams from our list, filtering out the already indexed ones
# retrieve *all* the ngrams from our list
# (even if some relations may be already indexed
# b/c they were perhaps not extracted in all docs
# => we'll use already_indexed later)
todo_ngrams = (session
.query(Ngram)
.filter(Ngram.id.in_(ngram_ids))
.filter(~ Ngram.id.in_(indexed_ngrams_subquery))
.all()
)
......@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
else:
node_ngram_to_write[doc.id][ngram.id] += n_occs
# debug
# print("new node_ngrams before filter:", node_ngram_to_write)
# check the relations we won't insert (those that were already indexed)
# NB costly but currently impossible with bulk_insert_ifnotexists
# b/c double uniquekey
already_indexed = (session
.query(NodeNgram.node_id, NodeNgram.ngram_id)
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == 'DOCUMENT')
.all()
)
filter_out = {(nd_id,ng_id) for (nd_id,ng_id) in already_indexed}
# POSSIBLE update those that are filtered out if wei_previous != wei
# integrate all at the end
my_new_rows = []
add_new_row = my_new_rows.append
for doc_id in node_ngram_to_write:
for ngram_id in node_ngram_to_write[doc_id]:
wei = node_ngram_to_write[doc_id][ngram_id]
add_new_row([doc_id, ngram_id, wei])
if (doc_id, ngram_id) not in filter_out:
wei = node_ngram_to_write[doc_id][ngram_id]
add_new_row([doc_id, ngram_id, wei])
del node_ngram_to_write
# debug
# print("new node_ngrams after filter:", my_new_rows)
bulk_insert(
table = NodeNgram,
fields = ('node_id', 'ngram_id', 'weight'),
data = my_new_rows
)
# bulk_insert_ifnotexists(
# model = NodeNgram,
# uniquekey = ('node_id','ngram_id'), <= currently impossible
# fields = ('node_id', 'ngram_id', 'weight'),
# data = my_new_rows
# )
n_added = len(my_new_rows)
print("index_new_ngrams: added %i new NodeNgram rows" % n_added)
......
......@@ -2,8 +2,8 @@ from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse\
, HttpResponse
from gargantext.util.db import session, func
from gargantext.util.db_cache import cache
from gargantext.models import Node, Ngram, NodeNgram
from gargantext.util.db_cache import cache
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram
from sqlalchemy.orm import aliased
from re import findall
......@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
"""
Used for analytics
------------------
Get ngram listing + counts in a given scope
"""
# parameters retrieval and validation
......@@ -83,24 +83,30 @@ class ApiNgrams(APIView):
1 - checks user authentication before any changes
2 - adds the ngram to Ngram table in DB
2 - checks if ngram to Ngram table in DB
if yes returns ngram_id and optionally mainform_id
otherwise continues
3 - adds the ngram to Ngram table in DB
3 - (if corpus param is present)
4 - (if corpus param is present)
adds the ngram doc counts to NodeNgram table in DB
(aka "index the ngram" throught the docs of the corpus)
4 - returns json with:
'msg' => a success msg
5 - returns json with:
'msg' => a success msg
'text' => the initial text content
'term' => the normalized text content
'id' => the new ngram_id
'count' => the number of docs with the ngram in the corpus
(if corpus param is present)
'group' => the mainform_id if applicable
possible inline parameters
--------------------------
@param text=<ngram_string> [required]
@param corpus=<CORPUS_ID> [optional]
@param testgroup (true if present) [optional, requires corpus]
"""
# 1 - check user authentication
......@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
It requires a "text" parameter,\
for instance /api/ngrams?text=hydrometallurgy')
if ('testgroup' in params) and (not ('corpus' in params)):
raise ValidationException("'testgroup' param requires 'corpus' param")
# if we have a 'corpus' param (to do the indexing)...
do_indexation = False
if 'corpus' in params:
......@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
try:
log_msg = ""
ngram_id = None
mainform_id = None
preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
if preexisting is not None:
ngram_id = preexisting.id
log_msg += "ngram already existed (id %i)\n" % ngram_id
# in the context of a corpus we can also check if has mainform
# (useful for)
if 'testgroup' in params:
groupings_id = (session.query(Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.typename == 'GROUPLIST')
.first()
)
had_mainform = (session.query(NodeNgramNgram.ngram1_id)
.filter(NodeNgramNgram.node_id == groupings_id)
.filter(NodeNgramNgram.ngram2_id == preexisting.id)
.first()
)
if had_mainform:
mainform_id = had_mainform[0]
log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
else:
log_msg += "ngram was not in any group for this corpus"
else:
# 2 - insert into Ngrams
new_ngram = Ngram(terms=ngram_str, n=ngram_size)
......@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
'text': original_text,
'term': ngram_str,
'id' : ngram_id,
'group' : mainform_id,
'count': n_added if do_indexation else 'no corpus provided for indexation'
}, 200)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment