Commit 78eb26d4 authored by delanoe's avatar delanoe

Merge remote-tracking branch 'origin/romain-testing' into testing-merge

parents 4bccaa96 5a2f7efb
......@@ -151,12 +151,15 @@
transition: all 0.25s linear;
}
.selection {
/* this was used for the p or div that *contained* a selection */
/*.selection {
color: #aaa;
}
}*/
/* this is used for the selected text itself */
::selection {
color: black;
background-color: rgba(0, 0, 0, 0.4);
background-color: #aaa;
}
.noselection {
......
......@@ -97,6 +97,21 @@
// +propToRead+" ("+cache[propToRead]+")")
params[key] = cache[propToRead]
}
else if (typeof val == "object" && val["fromCacheIfElse"]) {
var propToReadIf = val["fromCacheIfElse"][0]
var propToReadElse = val["fromCacheIfElse"][1]
// console.log("reading from cache: response data property " +
// "if:"+propToReadIf+" ("+cache[propToReadIf]+")"+
// " else:"+propToReadElse+" ("+cache[propToReadElse]+")")
var valueIf = cache[propToReadIf]
var valueElse = cache[propToReadElse]
if (valueIf && valueIf != 'null' && valueIf != '') {
params[key] = valueIf
}
else {
params[key] = valueElse
}
}
}
// Now we run the call
......@@ -149,8 +164,8 @@
// -------------------------------------------------------------------------
// debug
// console.log("==> $rootScope <==")
// console.log($rootScope)
console.log("==> $rootScope <==")
console.log($rootScope)
});
})(window);
......@@ -50,6 +50,9 @@
/*
* Universal text selection
*
* "universal" <=> (Chrome, Firefox, IE, Safari, Opera...)
* cf. quirksmode.org/dom/range_intro.html
*/
function getSelected() {
if (window.getSelection) {
......@@ -67,19 +70,15 @@
}
return false;
}
// we only need one singleton at a time
var selection = getSelected();
/*
* When mouse selection is started, we highlight it
*/
function toggleSelectionHighlight(text) {
if (text.trim() !== "" && !$element.hasClass('menu-is-opened')) {
$(".text-panel").addClass("selection");
} else {
$(".text-panel").removeClass("selection");
}
}
// £TODO extend "double click selection" on hyphen words
// and reduce it on apostrophe ones (except firefox)
// cf. stackoverflow.com/a/39005881/2489184
// jsfiddle.net/avvhsruu/
// we only need one singleton at a time
// (<=> is only created once per doc, but value of annotation changes)
var selectionObj = getSelected();
/*
* Dynamically construct the selection menu scope
......@@ -107,10 +106,11 @@
$scope.selection_text = angular.copy(annotation);
// debug
// console.log("toggleMenu with context:", context) ;
// console.log("toggleMenu with annotation: '" + JSON.stringify(annotation) +"'") ;
// console.log("toggleMenu with \$scope.selection_text: '" + JSON.stringify($scope.selection_text) +"'") ;
if (angular.isObject(annotation) && !$element.hasClass('menu-is-opened')) {
// existing ngram
var ngramId = annotation.uuid
var mainformId = annotation.group
......@@ -210,7 +210,7 @@
}
// "add" actions for non-existing ngram
else if (annotation.trim() !== "" && !$element.hasClass('menu-is-opened')) {
else if (annotation.trim() !== "" && ! context) {
var newNgramText = annotation.trim()
// new ngram (first call creates then like previous case for list)
$scope.menuItems.push({
......@@ -219,9 +219,9 @@
'crudCalls':[
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':newNgramText, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':stoplist_id, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':stoplist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
]
}) ;
$scope.menuItems.push({
......@@ -230,9 +230,9 @@
'crudCalls':[
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':newNgramText, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
]
}) ;
$scope.menuItems.push({
......@@ -241,23 +241,27 @@
'crudCalls':[
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':newNgramText, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCache': 'id'} } },
'params' : {'listId':mainlist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':maplist_id, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':maplist_id, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
]
}) ;
// show the menu
$element.fadeIn(50);
$element.addClass('menu-is-opened');
// console.warn("FADE IN menu", $element)
}
else {
// console.warn("=> else")
// close the menu
$scope.menuItems = [];
$element.fadeOut(50);
$element.removeClass('menu-is-opened');
// console.warn("FADE OUT menu", $element)
}
});
});
......@@ -283,26 +287,15 @@
});
/*
* Finish positioning the menu then display the menu
* Toggle the menu when clicking on an existing ngram or a free selection
*/
$(".text-container").mouseup(function(e){
$(".text-container").unbind("mousemove", positionMenu);
$rootScope.$emit("positionAnnotationMenu", e.pageX, e.pageY);
toggleSelectionHighlight(selection.toString().trim());
toggleMenu(null, selection.toString().trim());
});
/*
* Toggle the menu when clicking on an existing ngram keyword
*
* £TODO test: apparently this is never used ?
* (superseded by TextSelectionController.onClick)
*/
$(".text-container").delegate(':not("#selection")', "click", function(e) {
// if ($(e.target).hasClass("keyword-inline")) return;
positionMenu(e);
toggleSelectionHighlight(selection.toString().trim());
toggleMenu(null, selection.toString().trim());
// console.warn("calling toggleMenu from *mouseup*")
toggleMenu(null, selectionObj.toString().trim());
});
$rootScope.$on("positionAnnotationMenu", positionElement);
......@@ -322,9 +315,11 @@
$rootScope.makeChainedCalls(0, todoCrudCalls, $rootScope.refresh)
// syntax: (step_to_run_first, list_of_steps, lastCallback)
// hide the highlighted text and the menu element
$(".text-panel").removeClass("selection");
// hide the menu element
$element.fadeOut(100);
// the highlighted text hides itself when deselected
// (thx to browser and css ::selection)
};
}
]);
......@@ -407,10 +402,68 @@
var template = templateBegin + templateEnd;
var templateBeginRegexp = "<span ng-controller='TextSelectionController' ng-click='onClick\(\$event\)' class='keyword-inline'>";
var startPattern = "\\b((?:"+templateBeginRegexp+")*";
var startPattern = "(\\W|^)((?:"+templateBeginRegexp+")*";
var middlePattern = "(?:<\/span>)*\\s(?:"+templateBeginRegexp+")*";
var middlePattern = " ";
var endPattern = "(?:<\/span>)*)\\b";
var endPattern = "(?:<\/span>)*)(?=\\W|$)";
// --------------------------------------------------------------------------------
// Remarks about /\b/ and /(\W|^)/ and /(?=\W|$)/ etc.
//
// -----------------
// 1) we need to match entire words only
//
// ex: "the manifestation manifest".match(/manifest/g)
//
// => not good because it would hilight the substr
// inside 2nd word "the manifestation manifest"
// ^^^^^^^^ ^^^^^^^^
//
// so in this situation one usually uses \b (boundary)
//
// ex: "the manifestation manifest".match(/\bmanifest\b/g)
//
// ok: now only 3rd word is highlighted:
// "the manifestation manifest"
// ^^^^^^^^
// -----------------
//
// 2) but we can't really use boundary \b when we have accented chars
// ex:
// no accent: "la moitié".match(/la/) => ["la"]
// "la moitié".match(/\bla\b/) => ["la"]
//
// but "la moitié".match(/moitié/) => ["moitié"]
// "la moitié".match(/\bmoitié\b/) => [] <~~~ problem !
//
// cf. stackoverflow.com/questions/23458872/javascript-regex-word-boundary-b-issue
// stackoverflow.com/questions/2881445/utf-8-word-boundary-regex-in-javascript
// -----------------
//
// 3) normally the typical replacement for \b would be:
// - at start of string: /(?<=\W|^)/ (lookbehind boundary)
// - at end of string: /(?=\W|$)/ (lookahead boundary)
//
// ...
// but lookbehind not supported in js !! (sept 2016)
// cf. stackoverflow.com/questions/30118815
// -----------------
//
// 4) so in conclusion we will use this strategy:
//
// - at start of string: /(\W|^)/ (boundary, may capture ' ' or '' into $1)
// - for the html+word: /<aa>bla</aa>/ (same pattern as before)
// - at end of string: /(?=\W|$)/ (lookahead boundary)
// - in replacement: $1+anchor
//
// => This way if $1 was ' ' (or other non word char),
// then we re-add the char that we are replacing,
// and if $1 was '' (beginning of str)
// then we re-add nothing ;) )
//
// ex: "la moitié".replace(/(\s|^)moitié(?=\s|$)/, '$1hello') => "la hello"
// "moitié la".replace(/(\s|^)moitié(?=\s|$)/, '$1hello') => "hello la"
// ---------------------------------------------------------------------------------
// hash of flags filled in first pass loop : (== did annotation i match ?)
var isDisplayedIntraText = {};
......@@ -453,8 +506,8 @@
// var myPattern = new RegExp("\\b"+escapeRegExp(annotation.text)+"\\b", 'igm');
// previously:
var words = annotation.text.split(" ").map(escapeRegExp);
var myPattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');
var myPattern = new RegExp(startPattern + words.join(middlePattern) + endPattern, 'gmi');
// -------------------------------------------
// replace in text: matched annots by anchors
......@@ -472,6 +525,7 @@
// £dbgcount here unnecessary nbMatches (can go straight to ICI)
var matches = eltLongtext.match(myPattern)
var nbMatches = matches ? eltLongtext.match(myPattern).length : 0
if (nbMatches > 0) {
k += nbMatches ;
......@@ -480,7 +534,7 @@
l ++ ;
// ------------------------------------------------------------
// ICI we update each time
textMapping[eltId] = eltLongtext.replace(myPattern, myAnchor);
textMapping[eltId] = eltLongtext.replace(myPattern, "$1"+myAnchor);
// ex longtext -- "Background Few previous studies have
// examined non-wealth-based inequalities etc"
......
......@@ -90,17 +90,20 @@
* MainApiAddNgramHttpService: Create and index a new ngram
* ===========================
* route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
* ------
*
* NB it also checks if ngram exists (returns the preexisting id)
* and if it has a mainform/group (via 'testgroup' option)
* (useful if we add it to a list afterwards)
*
*/
http.factory('MainApiAddNgramHttpService', function($resource) {
return $resource(
// adding explicit "http://" b/c this a cross origin request
'http://' + window.GARG_ROOT_URL
+ "/api/ngrams?text=:ngramStr&corpus=:corpusId",
+ "/api/ngrams?text=:ngramStr&corpus=:corpusId&testgroup",
{
ngramStr: '@ngramStr',
corpusId: '@corpusId'
corpusId: '@corpusId',
},
{
put: {
......
......@@ -141,9 +141,9 @@
crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
];
break;
......@@ -151,9 +151,9 @@
crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
];
break;
......@@ -161,11 +161,11 @@
crudCallsToMake = [
{'service': MainApiAddNgramHttpService, 'action': 'put',
'params' : {'ngramStr':value, corpusId: $rootScope.corpusId},
'dataPropertiesToCache': ['id'] },
'dataPropertiesToCache': ['id', 'group'] },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCache': 'id'} } },
'params' : {'listId':$rootScope.listIds.MAINLIST, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } },
{'service': MainApiChangeNgramHttpService, 'action': 'put',
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCache': 'id'} } }
'params' : {'listId':tgtListId, 'ngramIdList': {'fromCacheIfElse': ['group','id']} } }
];
break;
}
......
......@@ -19,6 +19,7 @@ procedure:
from gargantext.models import Ngram, Node, NodeNgram
from gargantext.util.db import session, bulk_insert
from gargantext.util.db import bulk_insert_ifnotexists # £TODO debug
from sqlalchemy import distinct
from re import findall, IGNORECASE
......@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@param keys: the hyperdata fields to index
"""
# check the ngrams we won't process (those that were already indexed)
indexed_ngrams_subquery = (session
.query(distinct(NodeNgram.ngram_id))
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == 'DOCUMENT')
.subquery()
)
# retrieve the ngrams from our list, filtering out the already indexed ones
# retrieve *all* the ngrams from our list
# (even if some relations may be already indexed
# b/c they were perhaps not extracted in all docs
# => we'll use already_indexed later)
todo_ngrams = (session
.query(Ngram)
.filter(Ngram.id.in_(ngram_ids))
.filter(~ Ngram.id.in_(indexed_ngrams_subquery))
.all()
)
......@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
else:
node_ngram_to_write[doc.id][ngram.id] += n_occs
# debug
# print("new node_ngrams before filter:", node_ngram_to_write)
# check the relations we won't insert (those that were already indexed)
# NB costly but currently impossible with bulk_insert_ifnotexists
# b/c double uniquekey
already_indexed = (session
.query(NodeNgram.node_id, NodeNgram.ngram_id)
.join(Node, Node.id == NodeNgram.node_id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == 'DOCUMENT')
.all()
)
filter_out = {(nd_id,ng_id) for (nd_id,ng_id) in already_indexed}
# POSSIBLE update those that are filtered out if wei_previous != wei
# integrate all at the end
my_new_rows = []
add_new_row = my_new_rows.append
for doc_id in node_ngram_to_write:
for ngram_id in node_ngram_to_write[doc_id]:
wei = node_ngram_to_write[doc_id][ngram_id]
add_new_row([doc_id, ngram_id, wei])
if (doc_id, ngram_id) not in filter_out:
wei = node_ngram_to_write[doc_id][ngram_id]
add_new_row([doc_id, ngram_id, wei])
del node_ngram_to_write
# debug
# print("new node_ngrams after filter:", my_new_rows)
bulk_insert(
table = NodeNgram,
fields = ('node_id', 'ngram_id', 'weight'),
data = my_new_rows
)
# bulk_insert_ifnotexists(
# model = NodeNgram,
# uniquekey = ('node_id','ngram_id'), <= currently impossible
# fields = ('node_id', 'ngram_id', 'weight'),
# data = my_new_rows
# )
n_added = len(my_new_rows)
print("index_new_ngrams: added %i new NodeNgram rows" % n_added)
......
......@@ -2,8 +2,8 @@ from gargantext.util.http import ValidationException, APIView \
, get_parameters, JsonHttpResponse\
, HttpResponse
from gargantext.util.db import session, func
from gargantext.util.db_cache import cache
from gargantext.models import Node, Ngram, NodeNgram
from gargantext.util.db_cache import cache
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram
from sqlalchemy.orm import aliased
from re import findall
......@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
"""
Used for analytics
------------------
Get ngram listing + counts in a given scope
"""
# parameters retrieval and validation
......@@ -83,24 +83,30 @@ class ApiNgrams(APIView):
1 - checks user authentication before any changes
2 - adds the ngram to Ngram table in DB
2 - checks if ngram to Ngram table in DB
if yes returns ngram_id and optionally mainform_id
otherwise continues
3 - adds the ngram to Ngram table in DB
3 - (if corpus param is present)
4 - (if corpus param is present)
adds the ngram doc counts to NodeNgram table in DB
(aka "index the ngram" throught the docs of the corpus)
4 - returns json with:
'msg' => a success msg
5 - returns json with:
'msg' => a success msg
'text' => the initial text content
'term' => the normalized text content
'id' => the new ngram_id
'count' => the number of docs with the ngram in the corpus
(if corpus param is present)
'group' => the mainform_id if applicable
possible inline parameters
--------------------------
@param text=<ngram_string> [required]
@param corpus=<CORPUS_ID> [optional]
@param testgroup (true if present) [optional, requires corpus]
"""
# 1 - check user authentication
......@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
It requires a "text" parameter,\
for instance /api/ngrams?text=hydrometallurgy')
if ('testgroup' in params) and (not ('corpus' in params)):
raise ValidationException("'testgroup' param requires 'corpus' param")
# if we have a 'corpus' param (to do the indexing)...
do_indexation = False
if 'corpus' in params:
......@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
try:
log_msg = ""
ngram_id = None
mainform_id = None
preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()
if preexisting is not None:
ngram_id = preexisting.id
log_msg += "ngram already existed (id %i)\n" % ngram_id
# in the context of a corpus we can also check if has mainform
# (useful for)
if 'testgroup' in params:
groupings_id = (session.query(Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.typename == 'GROUPLIST')
.first()
)
had_mainform = (session.query(NodeNgramNgram.ngram1_id)
.filter(NodeNgramNgram.node_id == groupings_id)
.filter(NodeNgramNgram.ngram2_id == preexisting.id)
.first()
)
if had_mainform:
mainform_id = had_mainform[0]
log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
else:
log_msg += "ngram was not in any group for this corpus"
else:
# 2 - insert into Ngrams
new_ngram = Ngram(terms=ngram_str, n=ngram_size)
......@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
'text': original_text,
'term': ngram_str,
'id' : ngram_id,
'group' : mainform_id,
'count': n_added if do_indexation else 'no corpus provided for indexation'
}, 200)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment