Commit 5c07a501 authored by delanoe's avatar delanoe

Merge branch 'refactoring-rom' into refactoring

parents 8cc92549 35c3de76
ANNOTATIONS
===========
2016-01
## Routines de manipulation de ngrams dans les listes
#### Trajectoire globale des actions choisies
1. angular: ngramlist.js (user input) or highlight.js (user menu controller)
2. angular: http.js configuration object
{ 'action': 'post', 'listId': miamlist_id, ..}
3. AJAX POST/DELETE
4. "API locale" (=> annotations.views)
5. DB insert/delete
Remarque:
Dans le code annotations d'Elias, il y a une "API locale" qui transmet les actions client vers le serveur.
=> l'interconnexion est configurée pour angular dans annotations/static/annotations/app.js qui lance son propre main sur la fenêtre en prenant les paramètres depuis l'url et en s'isolant de django
=> les routes amont sont définies pour django dans annotations.urls et reprises pour angular dans http.js
#### Par ex: l'étape AJAX pour suppression
`curl -XDELETE http://localhost:8000/annotations/lists/7129/ngrams/4497`
via annotations.views.NgramEdit.as_view())
#### ajout d'un ngram
```
curl -XPOST http://localhost:8000/annotations/lists/1866/ngrams/create \
-H "Content-Type: application/json" \
-d '{"text":"yooooooooo"}' > response_to_ngrams_create.html
```
## Points d'interaction côté client (GUI)
Add Ngram via dialog box :
- controller:
ngramlist.annotationsAppNgramList.controller('NgramInputController')
- effect:
1. NgramHttpService.post()
Add Ngram via select new + menu
- controller:
highlight.annotationsAppHighlight.controller('TextSelectionMenuController')
1. toggleMenu (sets action = X)
2. onMenuClick
- effect:
1. NgramHttpService[action]
...@@ -24,18 +24,21 @@ ...@@ -24,18 +24,21 @@
}); });
}); });
// FIXME: est-ce qu'on ne pourrait pas directement utiliser lists
// au lieu de recopier dans allListsSelect ?
$rootScope.$watchCollection('lists', function (newValue, oldValue) { $rootScope.$watchCollection('lists', function (newValue, oldValue) {
if (newValue === undefined) return; if (newValue === undefined) return;
// reformat lists to allListsSelect // reformat lists to allListsSelect
var allListsSelect = []; var allListsSelect = [];
console.log($rootScope.lists) // console.log($rootScope.lists)
angular.forEach($rootScope.lists, function(value, key) { angular.forEach($rootScope.lists, function(value, key) {
this.push({ this.push({
'id': key, 'id': key,
'label': value 'label': value
}); });
// initialize activeLists with the MiamList by default // initialize activeLists with the MAPLIST by default
if (value == 'MAINLIST') { if (value == 'MAPLIST') {
$rootScope.activeLists = {}; $rootScope.activeLists = {};
$rootScope.activeLists[key] = value; $rootScope.activeLists[key] = value;
} }
...@@ -45,7 +48,7 @@ ...@@ -45,7 +48,7 @@
$timeout(function() { $timeout(function() {
$('.selectpicker').selectpicker(); $('.selectpicker').selectpicker();
$('.selectpicker').selectpicker('val', ['MAINLIST']); $('.selectpicker').selectpicker('val', ['MAPLIST']);
}); });
}); });
......
...@@ -6,14 +6,23 @@ ...@@ -6,14 +6,23 @@
*/ */
.MAPLIST {
color: black;
/* green */
background-color: rgba(60, 118, 61, .7);
cursor: pointer;
}
.MAINLIST { .MAINLIST {
color: black; color: black;
background-color: rgba(60, 118, 61, 0.5); /* background-color: rgba(60, 118, 61, 0.5); */
background-color: orange;
cursor: pointer; cursor: pointer;
} }
.STOPLIST { .STOPLIST {
color: black; color: black;
/* grey */
background-color: rgba(169, 68, 66, 0.2); background-color: rgba(169, 68, 66, 0.2);
cursor: pointer; cursor: pointer;
} }
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
// dataLoading = signal pour afficher wait // dataLoading = signal pour afficher wait
$scope.dataLoading = true ; $scope.dataLoading = true ;
console.log("annotations.document.DocController.DocumentHttpService.get():before")
$rootScope.documentResource = DocumentHttpService.get( $rootScope.documentResource = DocumentHttpService.get(
{'docId': $rootScope.docId}, {'docId': $rootScope.docId},
...@@ -21,6 +23,7 @@ ...@@ -21,6 +23,7 @@
$rootScope.docId = data.id; $rootScope.docId = data.id;
$rootScope.full_text = data.full_text; $rootScope.full_text = data.full_text;
$rootScope.abstract_text = data.abstract_text; $rootScope.abstract_text = data.abstract_text;
console.log("annotations.document.DocController.getannotations")
// GET the annotationss // GET the annotationss
NgramListHttpService.get( NgramListHttpService.get(
{ {
......
This diff is collapsed.
...@@ -8,7 +8,28 @@ ...@@ -8,7 +8,28 @@
$httpProvider.defaults.xsrfCookieName = 'csrftoken'; $httpProvider.defaults.xsrfCookieName = 'csrftoken';
}]); }]);
/* /*
* Read Document * DocumentHttpService: Read Document
* ===================
*
* route: annotations/documents/@d_id
* ------
*
* exemple:
* --------
* {
* "id": 556,
* "publication_date": "01/01/66",
* "title": "Megalithic astronomy: Indications in standing stones",
* "abstract_text": "An account is given of a number of surveys of
* stone circles, alignments, etc., found in Britain.
* The geometry of the rings is discussed in so far
* as it affects the determination of the azimuths
* to outliers and other circles.",
* "full_text": null,
* "journal": "Vistas in Astronomy",
* "authors": "A. Thom"
* }
*
*/ */
http.factory('DocumentHttpService', function($resource) { http.factory('DocumentHttpService', function($resource) {
return $resource( return $resource(
...@@ -26,7 +47,28 @@ ...@@ -26,7 +47,28 @@
}); });
/* /*
* Read all Ngrams * NgramListHttpService: Read all Ngrams
* =====================
*
* route: annotations/corpora/@c_id/documents/@d_id
* ------
*
* json return format:
* -------------------
* corpus_id : {
* lists: {(list_id:name)+}
* doc_id : [ngrams_objects]+,
* }
*
* exemple:
* --------
* "554": {
* "lists": { "558": "StopList", "564": "MiamList", "565": "MapList" }
* "556": [{ "uuid": 2368, "occurrences": 1.0, "text": "idea", "list_id": 564 },
* { "uuid": 5031, "occurrences": 1.0, "text": "indications", "list_id": 564},
* { "uuid": 5015, "occurrences": 3.0, "text": "star", "list_id": 565 },
* ... ],
* }
*/ */
http.factory('NgramListHttpService', function ($resource) { http.factory('NgramListHttpService', function ($resource) {
return $resource( return $resource(
...@@ -45,7 +87,21 @@ ...@@ -45,7 +87,21 @@
}); });
/* /*
* Create, modify or delete 1 Ngram * NgramHttpService: Create, modify or delete 1 Ngram
* =================
*
* TODO REACTIVATE IN urls.py
*
* if new ngram:
* -> ngram_id will be "create"
* -> route: annotations/lists/@node_id/ngrams/create
* -> will land on views.NgramCreate
*
* else:
* -> ngram_id is a real ngram id
* -> route: annotations/lists/@node_id/ngrams/@ngram_id
* -> will land on views.NgramCreate
*
*/ */
http.factory('NgramHttpService', function ($resource) { http.factory('NgramHttpService', function ($resource) {
return $resource( return $resource(
......
...@@ -24,7 +24,13 @@ ...@@ -24,7 +24,13 @@
'docId': $rootScope.docId 'docId': $rootScope.docId
}, },
function(data) { function(data) {
// $rootScope.annotations
// ----------------------
// is the union of all lists, one being later "active"
// (then used for left-side flatlist AND inline annots)
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()]; $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
// TODO £NEW : lookup obj[list_id][term_text] = {terminfo}
// $rootScope.lookup =
$rootScope.refreshDisplay(); $rootScope.refreshDisplay();
}, },
function(data) { function(data) {
...@@ -87,7 +93,28 @@ ...@@ -87,7 +93,28 @@
var value = angular.element(inputEltId).val().trim(); var value = angular.element(inputEltId).val().trim();
if (value === "") return; if (value === "") return;
// £TEST locally check if already in annotations NodeNgrams ------
// $rootScope.annotations = array of ngram objects like:
// {"list_id":805,"occurrences":2,"uuid":9386,"text":"petit échantillon"}
console.log('looking for "' + value + '" in list:' + listId)
var already_in_list = false ;
angular.forEach($rootScope.annotations, function(annot,i) {
// console.log(i + ' => ' + annot.text + ',' + annot.list_id) ;
if (value == annot.text && listId == annot.list_id) {
console.log('the term "' + value + '" was already present in list')
// no creation
already_in_list = true ;
}
}
);
if (already_in_list) { return ; }
// ---------------------------------------------------------------
// will check if there's a preexisting ngramId for this value
// TODO: if maplist => also add to miam
NgramHttpService.post( NgramHttpService.post(
{ {
'listId': listId, 'listId': listId,
...@@ -97,6 +124,7 @@ ...@@ -97,6 +124,7 @@
'text': value 'text': value
}, },
function(data) { function(data) {
console.warn("refresh attempt");
// on success // on success
if (data) { if (data) {
angular.element(inputEltId).val(""); angular.element(inputEltId).val("");
...@@ -108,6 +136,11 @@ ...@@ -108,6 +136,11 @@
}, },
function(data) { function(data) {
$rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()]; $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
// TODO £NEW : lookup obj[list_id][term_text] = {terminfo}
// $rootScope.lookup =
$rootScope.refreshDisplay(); $rootScope.refreshDisplay();
}, },
function(data) { function(data) {
......
...@@ -55,10 +55,14 @@ ...@@ -55,10 +55,14 @@
</div> </div>
</div> </div>
</div> </div>
<div> <div class="list-selector">
<h5>Select lists</h5> <h5>Select lists</h5>
<select class="selectpicker" multiple ng-change="activeListsChange()" ng-model="lists" ng-controller="ActiveListsController"> <select class="selectpicker" multiple ng-change="activeListsChange()" ng-model="lists" ng-controller="ActiveListsController">
<option ng-repeat="item in allListsSelect" id="list---{[{item.id}]}" ng-disabled="{[{ item.label == 'MAINLIST' }]}">{[{item.label}]}</option>
<option ng-repeat="item in allListsSelect" id="list---{[{item.id}]}">{[{item.label}]}</option>
<!-- to disallow unchecking MapList add this into <option> element: ng-disabled="{[{ item.label == 'MapList' }]}" -->
</select> </select>
</div> </div>
</div> </div>
......
...@@ -10,6 +10,10 @@ urlpatterns = [ ...@@ -10,6 +10,10 @@ urlpatterns = [
# publication_date # publication_date
# abstract_text,full_text # abstract_text,full_text
url(r'^documents/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view url(r'^documents/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
# GET:
# was : lists ∩ document (ngram_ids intersection if connected to list node_id and doc node_id)
# fixed 2016-01: just lists (because document doesn't get updated by POST create cf. ngram.lists.DocNgram filter commented)
url(r'^corpora/(?P<corpus_id>[0-9]+)/documents/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram url(r'^corpora/(?P<corpus_id>[0-9]+)/documents/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
# 2016-03-24: refactoring, deactivated NgramEdit and NgramCreate # 2016-03-24: refactoring, deactivated NgramEdit and NgramCreate
......
...@@ -34,33 +34,51 @@ def main(request, project_id, corpus_id, document_id): ...@@ -34,33 +34,51 @@ def main(request, project_id, corpus_id, document_id):
}, context_instance=RequestContext(request)) }, context_instance=RequestContext(request))
class NgramList(APIView): class NgramList(APIView):
"""Read and Write Annotations""" """Read the lists of ngrams (terms) that will become annotations"""
renderer_classes = (JSONRenderer,) renderer_classes = (JSONRenderer,)
def get(self, request, corpus_id, doc_id): def get(self, request, corpus_id, doc_id):
"""Get All for a doc id""" """Get All for a doc id"""
corpus_id = int(corpus_id) corpus_id = int(corpus_id)
doc_id = int(doc_id) doc_id = int(doc_id)
# our results: ngrams for the corpus_id (ignoring doc_id for the moment)
doc_ngram_list = []
lists = {} lists = {}
for list_type in ['MAINLIST']:
for list_type in ['MAINLIST', 'MAPLIST', 'STOPLIST']:
corpus_nod = cache.Node[corpus_id] corpus_nod = cache.Node[corpus_id]
list_nod = corpus_nod.children(typename=list_type).first() list_nod = corpus_nod.children(typename=list_type).first()
list_id = list_nod.id list_id = list_nod.id
lists["%s" % list_id] = list_type lists["%s" % list_id] = list_type
# ngrams for the corpus_id (ignoring doc_id for the moment): # add to results
doc_ngram_list = [(obj.id, obj.terms, w) for (w,obj) in list_nod.ngrams.all()] doc_ngram_list += [(obj.id, obj.terms, w, list_id) for (w,obj) in list_nod.ngrams.all()]
print("annotations.views.NgramList.doc_ngram_list: ", doc_ngram_list)
data = { '%s' % corpus_id : { data = { '%s' % corpus_id : {
'%s' % doc_id : [ '%s' % doc_id :
{ [
'uuid': ngram_id, {'uuid': ngram_id,
'text': ngram_text, 'text': ngram_text,
'occurrences': ngram_occurrences, 'occurrences': ngram_occurrences,
'list_id': list_id, 'list_id': list_id,}
} for (ngram_id,ngram_text,ngram_occurrences,list_id) in doc_ngram_list
for ngram_id, ngram_text, ngram_occurrences in doc_ngram_list], ],
'lists': lists 'lists': lists
}} }}
# format alternatif de transmission des "annotations", classé par listes puis ngram_id
# { 'corpus_id' : {
# list_id_stop: {term_stop1: {term_data}, term_stop2: {term_data}..},
# list_id_miam: {term_miam1: {term_data}, term_miam2: {term_data}..},
# list_id_map: {term_map1: {term_data}, term_map2: {term_data}..},
# }
# 'lists' : {"list_id" : "list_type" ... }
# }
# NB 3rd possibility: unicity of ngram_text could also allow us to use it
# as key and could enhance lookup later (frequent checks if term exists)
return Response(data) return Response(data)
......
...@@ -85,19 +85,19 @@ INDEXED_HYPERDATA = { ...@@ -85,19 +85,19 @@ INDEXED_HYPERDATA = {
, 'convert_from_db': str , 'convert_from_db': str
}, },
'text': # 'text':
{ 'id' : 7 # { 'id' : 7
, 'type' : str # , 'type' : str
, 'convert_to_db' : str # , 'convert_to_db' : str
, 'convert_from_db': str # , 'convert_from_db': str
}, # },
#
'page': # 'page':
{ 'id' : 8 # { 'id' : 8
, 'type' : int # , 'type' : int
, 'convert_to_db' : int # , 'convert_to_db' : int
, 'convert_from_db': int # , 'convert_from_db': int
}, # },
} }
...@@ -121,38 +121,47 @@ from gargantext.util.parsers import \ ...@@ -121,38 +121,47 @@ from gargantext.util.parsers import \
EuropressParser, RISParser, PubmedParser, ISIParser, CSVParser, ISTexParser EuropressParser, RISParser, PubmedParser, ISIParser, CSVParser, ISTexParser
RESOURCETYPES = [ RESOURCETYPES = [
# type 0
{ 'name': 'Europress (English)', { 'name': 'Europress (English)',
'parser': EuropressParser, 'parser': EuropressParser,
'default_language': 'en', 'default_language': 'en',
}, },
# type 1
{ 'name': 'Europress (French)', { 'name': 'Europress (French)',
'parser': EuropressParser, 'parser': EuropressParser,
'default_language': 'fr', 'default_language': 'fr',
}, },
# type 2
{ 'name': 'Jstor (RIS format)', { 'name': 'Jstor (RIS format)',
'parser': RISParser, 'parser': RISParser,
'default_language': 'en', 'default_language': 'en',
}, },
# type 3
{ 'name': 'Pubmed (XML format)', { 'name': 'Pubmed (XML format)',
'parser': PubmedParser, 'parser': PubmedParser,
'default_language': 'en', 'default_language': 'en',
}, },
# type 4
{ 'name': 'Scopus (RIS format)', { 'name': 'Scopus (RIS format)',
'parser': RISParser, 'parser': RISParser,
'default_language': 'en', 'default_language': 'en',
}, },
# type 5
{ 'name': 'Web of Science (ISI format)', { 'name': 'Web of Science (ISI format)',
'parser': ISIParser, 'parser': ISIParser,
'default_language': 'fr', 'default_language': 'fr',
}, },
# type 6
{ 'name': 'Zotero (RIS format)', { 'name': 'Zotero (RIS format)',
'parser': RISParser, 'parser': RISParser,
'default_language': 'en', 'default_language': 'en',
}, },
# type 7
{ 'name': 'CSV', { 'name': 'CSV',
'parser': CSVParser, 'parser': CSVParser,
'default_language': 'en', 'default_language': 'en',
}, },
# type 8
{ 'name': 'ISTex', { 'name': 'ISTex',
'parser': ISTexParser, 'parser': ISTexParser,
'default_language': 'en', 'default_language': 'en',
...@@ -165,7 +174,7 @@ DEFAULT_TFIDF_CUTOFF_RATIO = .45 # MAINLIST maximum terms in % ...@@ -165,7 +174,7 @@ DEFAULT_TFIDF_CUTOFF_RATIO = .45 # MAINLIST maximum terms in %
DEFAULT_TFIDF_HARD_LIMIT = 750 # MAINLIST maximum terms abs DEFAULT_TFIDF_HARD_LIMIT = 750 # MAINLIST maximum terms abs
# (makes COOCS larger ~ O(N²) /!\) # (makes COOCS larger ~ O(N²) /!\)
DEFAULT_COOC_THRESHOLD = 3 # inclusive minimum for COOCS coefs DEFAULT_COOC_THRESHOLD = 2 # inclusive minimum for COOCS coefs
# (makes COOCS more sparse) # (makes COOCS more sparse)
DEFAULT_MAPLIST_MAX = 300 # MAPLIST maximum terms DEFAULT_MAPLIST_MAX = 300 # MAPLIST maximum terms
...@@ -206,4 +215,3 @@ BATCH_NGRAMSEXTRACTION_SIZE = 1024 ...@@ -206,4 +215,3 @@ BATCH_NGRAMSEXTRACTION_SIZE = 1024
# Scrapers config # Scrapers config
QUERY_SIZE_N_MAX = 1000 QUERY_SIZE_N_MAX = 1000
QUERY_SIZE_N_DEFAULT = 1000 QUERY_SIZE_N_DEFAULT = 1000
...@@ -18,11 +18,11 @@ class CSVParser(Parser): ...@@ -18,11 +18,11 @@ class CSVParser(Parser):
return Freqs return Freqs
def parse(self, filename): def parse(self, filebuf):
print("CSV: parsing (assuming UTF-8 and LF line endings)") print("CSV: parsing (assuming UTF-8 and LF line endings)")
contents = filename.read().decode("UTF-8").split("\n") contents = filebuf.read().decode("UTF-8").split("\n")
sample_size = 10 sample_size = 10
sample_contents = contents[0:sample_size] sample_contents = contents[0:sample_size]
......
from lxml import etree
from ._Parser import Parser from ._Parser import Parser
from datetime import datetime from datetime import datetime
from io import BytesIO from io import BytesIO
...@@ -6,10 +5,10 @@ import json ...@@ -6,10 +5,10 @@ import json
class ISTexParser(Parser): class ISTexParser(Parser):
def parse(self, thefile): def parse(self, filebuf):
json_data=open(thefile,"r") contents = filebuf.read().decode("UTF-8")
data = json.load(json_data) data = json.loads(contents)
json_data.close() filebuf.close()
json_docs = data["hits"] json_docs = data["hits"]
hyperdata_list = [] hyperdata_list = []
hyperdata_path = { hyperdata_path = {
......
...@@ -145,5 +145,6 @@ class Parser: ...@@ -145,5 +145,6 @@ class Parser:
try: try:
file.seek(0) file.seek(0)
except:pass except:pass
# debug: print(self.parse) # do we have correct parser ?
for hyperdata in self.parse(file): for hyperdata in self.parse(file):
yield self.format_hyperdata(hyperdata) yield self.format_hyperdata(hyperdata)
...@@ -3,6 +3,8 @@ For initial ngram groups via stemming ...@@ -3,6 +3,8 @@ For initial ngram groups via stemming
Exemple: Exemple:
- groups['copper engrav'] = {'copper engraving':3, 'coppers engraver':1...} - groups['copper engrav'] = {'copper engraving':3, 'coppers engraver':1...}
- groups['post'] = {'poste':3, 'poster':5, 'postés':2...} - groups['post'] = {'poste':3, 'poster':5, 'postés':2...}
TODO use groups for aggregated occurrences/coocs counts !
""" """
from gargantext.models import Node, NodeNgramNgram from gargantext.models import Node, NodeNgramNgram
...@@ -25,8 +27,9 @@ def prepare_stemmers(corpus): ...@@ -25,8 +27,9 @@ def prepare_stemmers(corpus):
'__unknown__' : SnowballStemmer("english") '__unknown__' : SnowballStemmer("english")
} }
for lgiso2 in corpus.hyperdata['languages'].keys(): for lgiso2 in corpus.hyperdata['languages'].keys():
lgname = languages[lgiso2].name.lower() if (lgiso2 != '__skipped__'):
stemmers_by_lg[lgiso2] = SnowballStemmer(lgname) lgname = languages[lgiso2].name.lower()
stemmers_by_lg[lgiso2] = SnowballStemmer(lgname)
return stemmers_by_lg return stemmers_by_lg
def compute_groups(corpus, stoplist_id = None, overwrite_id = None): def compute_groups(corpus, stoplist_id = None, overwrite_id = None):
......
...@@ -45,15 +45,27 @@ def extract_ngrams(corpus, keys=('title', 'abstract', )): ...@@ -45,15 +45,27 @@ def extract_ngrams(corpus, keys=('title', 'abstract', )):
ngrams_data = set() ngrams_data = set()
# extract ngrams # extract ngrams
resource_type_index = corpus.resources()[0]['type'] resource_type_index = corpus.resources()[0]['type']
resource_type = RESOURCETYPES[resource_type_index] resource_type = RESOURCETYPES[resource_type_index]
default_language_iso2 = resource_type['default_language'] default_language_iso2 = resource_type['default_language']
for documents_count, document in enumerate(corpus.children('DOCUMENT')): for documents_count, document in enumerate(corpus.children('DOCUMENT')):
# get ngrams extractor for the current document # get ngrams extractor for the current document
language_iso2 = document.hyperdata.get('language_iso2', default_language_iso2) language_iso2 = document.hyperdata.get('language_iso2', default_language_iso2)
try: try:
# this looks for a parser in constants.LANGUAGES
ngramsextractor = ngramsextractors[language_iso2] ngramsextractor = ngramsextractors[language_iso2]
except KeyError: except KeyError:
print('Unrecognized language: `%s`' % (language_iso2, )) # skip document
print('Unsupported language: `%s`' % (language_iso2, ))
# and remember that for later processes (eg stemming)
document.hyperdata['__skipped__'] = 'ngrams_extraction'
document.save_hyperdata()
session.commit()
if language_iso2 in corpus.hyperdata['languages']:
skipped_lg_infos = corpus.hyperdata['languages'].pop(language_iso2)
corpus.hyperdata['languages']['__skipped__'][language_iso2] = skipped_lg_infos
corpus.save_hyperdata()
session.commit()
continue continue
# extract ngrams on each of the considered keys # extract ngrams on each of the considered keys
for key in keys: for key in keys:
......
...@@ -49,8 +49,10 @@ def parse(corpus): ...@@ -49,8 +49,10 @@ def parse(corpus):
documents_count += 1 documents_count += 1
# update info about the resource # update info about the resource
resource['extracted'] = True resource['extracted'] = True
# add a corpus-level info about languages # add a corpus-level info about languages...
corpus.hyperdata['languages'] = observed_languages corpus.hyperdata['languages'] = observed_languages
# ...with a special key inside for skipped languages at ngrams_extraction
corpus.hyperdata['languages']['__skipped__'] = {}
# commit all changes # commit all changes
corpus.status('parsing', progress=documents_count, complete=True) corpus.status('parsing', progress=documents_count, complete=True)
corpus.save_hyperdata() corpus.save_hyperdata()
......
...@@ -148,6 +148,7 @@ class GroupChange(APIView): ...@@ -148,6 +148,7 @@ class GroupChange(APIView):
=> removes couples where newly reconnected ngrams where involved => removes couples where newly reconnected ngrams where involved
=> adds new couples from GroupsBuffer of terms view => adds new couples from GroupsBuffer of terms view
TODO recalculate scores after new groups
TODO see use of util.lists.Translations TODO see use of util.lists.Translations
TODO benchmark selective delete compared to entire list rewrite TODO benchmark selective delete compared to entire list rewrite
""" """
...@@ -155,10 +156,6 @@ class GroupChange(APIView): ...@@ -155,10 +156,6 @@ class GroupChange(APIView):
all_nodes_involved = [] all_nodes_involved = []
links = [] links = []
print([i for i in request.POST.lists()])
pass
for (mainform_key, subforms_ids) in request.POST.lists(): for (mainform_key, subforms_ids) in request.POST.lists():
mainform_id = mainform_key[:-2] # remove brackets '543[]' -> '543' mainform_id = mainform_key[:-2] # remove brackets '543[]' -> '543'
all_nodes_involved.append(mainform_id) all_nodes_involved.append(mainform_id)
...@@ -414,10 +411,19 @@ class ListFamily(APIView): ...@@ -414,10 +411,19 @@ class ListFamily(APIView):
links = Translations(groups_id) links = Translations(groups_id)
linkinfo = links.groups linkinfo = links.groups
# the output form # list of
for ng in mainlist_query.all() + hidden_ngrams_query.all(): ngrams_which_need_detailed_info = []
if "head" in parameters:
# head triggered simplified form: just the top of the mainlist
# TODO add maplist membership
ngrams_which_need_detailed_info = mainlist_query.all()
else:
ngrams_which_need_detailed_info = mainlist_query.all() + hidden_ngrams_query.all()
# the output form of details is:
# ngraminfo[id] => [term, weight]
for ng in ngrams_which_need_detailed_info:
ng_id = ng[0] ng_id = ng[0]
# id => [term, weight]
ngraminfo[ng_id] = ng[1:] ngraminfo[ng_id] = ng[1:]
# NB the client js will sort mainlist ngs from hidden ngs after ajax # NB the client js will sort mainlist ngs from hidden ngs after ajax
......
...@@ -7,15 +7,18 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() ...@@ -7,15 +7,18 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
, url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() ) , url(r'^nodes/(\d+)$' , nodes.NodeResource.as_view() )
, url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() ) , url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view() )
, url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() ) , url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view() )
# get a list of ngram_ids or ngram_infos by list_id # get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()), # url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
, url(r'^ngramlists/change$', ngramlists.ListChange.as_view() )
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
, url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view()) , url(r'^ngramlists/groups$', ngramlists.GroupChange.as_view())
# modify grouping couples of a group node # modify grouping couples of a group node
# ex: POST ngramlists/groups?node=43 # ex: POST ngramlists/groups?node=43
# post data looks like : {"767":[209,640],"779":[436,265,385]}" # post data looks like : {"767":[209,640],"779":[436,265,385]}"
, url(r'^ngramlists/family$' , ngramlists.ListFamily.as_view()) , url(r'^ngramlists/family$' , ngramlists.ListFamily.as_view())
# entire combination of lists from a corpus # entire combination of lists from a corpus
...@@ -24,5 +27,4 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view() ...@@ -24,5 +27,4 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# - an optional stoplist # - an optional stoplist
# - an optional maplist # - an optional maplist
# - an optional grouplist # - an optional grouplist
] ]
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from time import sleep from time import sleep
import datetime import datetime
import threading import threading
from traceback import print_tb
#from gargantext.settings import MEDIA_ROOT, BASE_DIR #from gargantext.settings import MEDIA_ROOT, BASE_DIR
from django.shortcuts import redirect from django.shortcuts import redirect
...@@ -111,7 +112,7 @@ def save(request , project_id): ...@@ -111,7 +112,7 @@ def save(request , project_id):
user_id = request.user.id, user_id = request.user.id,
parent_id = project_id, parent_id = project_id,
typename = 'CORPUS', typename = 'CORPUS',
hyperdata = { "action" : "Scraping data" hyperdata = { "action" : "Scrapping data"
, "language_id" : None , "language_id" : None
} }
) )
...@@ -137,9 +138,9 @@ def save(request , project_id): ...@@ -137,9 +138,9 @@ def save(request , project_id):
for filename in tasks.firstResults: for filename in tasks.firstResults:
if filename!=False: if filename!=False:
# add the uploaded resource to the corpus # add the uploaded resource to the corpus
# add the uploaded resource to the corpus corpus.add_resource(
corpus.add_resource( type = 3 type = 8 # cf. constants.RESOURCETYPES
, path = filename , path = filename
) )
dwnldsOK+=1 dwnldsOK+=1
...@@ -152,14 +153,17 @@ def save(request , project_id): ...@@ -152,14 +153,17 @@ def save(request , project_id):
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
try:
print_tb(error.__traceback__)
except:
pass
# IMPORTANT ---------------------------------
# sanitize session after interrupted transact
session.rollback()
# --------------------------------------------
sleep(1) sleep(1)
return HttpResponseRedirect('/projects/' + str(project_id)) return HttpResponseRedirect('/projects/' + str(project_id))
data = [query_string,query,N] data = [query_string,query,N]
return JsonHttpResponse(data) return JsonHttpResponse(data)
...@@ -12,6 +12,7 @@ import json ...@@ -12,6 +12,7 @@ import json
import datetime import datetime
from os import path from os import path
import threading import threading
from traceback import print_tb
#from gargantext.settings import MEDIA_ROOT, BASE_DIR #from gargantext.settings import MEDIA_ROOT, BASE_DIR
from django.shortcuts import redirect from django.shortcuts import redirect
...@@ -159,10 +160,16 @@ def save( request , project_id ) : ...@@ -159,10 +160,16 @@ def save( request , project_id ) :
except Exception as error: except Exception as error:
print('WORKFLOW ERROR') print('WORKFLOW ERROR')
print(error) print(error)
try:
print_tb(error.__traceback__)
except:
pass
# IMPORTANT ---------------------------------
# sanitize session after interrupted transact
session.rollback()
# --------------------------------------------
sleep(1) sleep(1)
return HttpResponseRedirect('/projects/' + str(project_id)) return HttpResponseRedirect('/projects/' + str(project_id))
data = alist data = alist
return JsonHttpResponse(data) return JsonHttpResponse(data)
...@@ -22,6 +22,19 @@ th a { ...@@ -22,6 +22,19 @@ th a {
font-size: 0.7em; font-size: 0.7em;
} }
#dynatable-query-search-my-ajax-table {
min-width: 20em;
}
.dynatable-search {
margin-left: 2em;
font-size: 16px;
}
.dynatable-per-page-label {
margin-left: 2em;
font-size: 16px;
}
#corpusdisplayer { #corpusdisplayer {
width:200px; width:200px;
margin:0 auto; margin:0 auto;
...@@ -60,8 +73,8 @@ p.note > label { ...@@ -60,8 +73,8 @@ p.note > label {
float: left; float: left;
} }
.note.greyed { .greyed {
opacity: 0.2; opacity: 0.3;
} }
tr:hover { tr:hover {
...@@ -77,9 +90,11 @@ tr:hover { ...@@ -77,9 +90,11 @@ tr:hover {
.delete { .delete {
color:red; color:red;
opacity: 0.8; opacity: 0.8;
text-decoration: line-through;
} }
.keep { .keep {
color:green; color:green;
font-style: italic;
} }
.group { .group {
......
...@@ -591,6 +591,7 @@ function getTopPapers(type){ ...@@ -591,6 +591,7 @@ function getTopPapers(type){
if(pub["title"]) { if(pub["title"]) {
var gquery = "https://searx.laquadrature.net/?categories=general&q="+pub["title"].replace(" "+"+") var gquery = "https://searx.laquadrature.net/?categories=general&q="+pub["title"].replace(" "+"+")
// ex url_elems = ["http:", "", "localhost:8000", "projects", "1", "corpora", "2690", "explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5"]
var url_elems = window.location.href.split("/") var url_elems = window.location.href.split("/")
var url_mainIDs = {} var url_mainIDs = {}
for(var i=0; i<url_elems.length; i++) { for(var i=0; i<url_elems.length; i++) {
...@@ -598,7 +599,10 @@ function getTopPapers(type){ ...@@ -598,7 +599,10 @@ function getTopPapers(type){
url_mainIDs[url_elems[i-1]] = Number(url_elems[i]); url_mainIDs[url_elems[i-1]] = Number(url_elems[i]);
} }
} }
var getpubAPI = window.location.origin+'/project/'+url_mainIDs["project"]+'/corpus/'+ url_mainIDs["corpus"] + '/document/'+pub["id"] // ex url_mainIDs = {projects: 1, corpora: 2690}
// link to matching document
var getpubAPI = window.location.origin+'/projects/'+url_mainIDs["projects"]+'/corpora/'+ url_mainIDs["corpora"] + '/documents/'+pub["id"]
var ifjournal="",ifauthors="",ifkeywords="",ifdate="",iftitle=""; var ifjournal="",ifauthors="",ifkeywords="",ifdate="",iftitle="";
......
...@@ -33,15 +33,9 @@ var latest,oldest; ...@@ -33,15 +33,9 @@ var latest,oldest;
var TheBuffer = false var TheBuffer = false
function Push2Buffer( NewVal ) { function Push2Buffer( NewVal ) {
console.log( " = = = = = = = = " )
console.log( "Push2Buffer()" )
console.log( "\t"+NewVal )
if ( TheBuffer == false) { if ( TheBuffer == false) {
if( ! NewVal ) { if( ! NewVal ) {
// var limits = [ new Date( oldest[0],oldest[1],oldest[2] ) , new Date( latest[0],latest[1],latest[2] ) ]; var limits = [ oldest , latest ];
var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ];
limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
NewVal = limits; NewVal = limits;
} }
console.log( " - - - - - - " ) console.log( " - - - - - - " )
...@@ -57,9 +51,7 @@ function Push2Buffer( NewVal ) { ...@@ -57,9 +51,7 @@ function Push2Buffer( NewVal ) {
var past = TheBuffer[0]+"_"+TheBuffer[1] var past = TheBuffer[0]+"_"+TheBuffer[1]
if( ! NewVal ) { if( ! NewVal ) {
var limits = [new Date(oldest[0],oldest[1]-1,oldest[2]), new Date(latest[0],latest[1]-1,latest[2] ) ]; var limits = [ oldest , latest ];
limits[0] = new Date(limits[0].setDate(limits[0].getDate()-1) );
limits[1] = new Date(limits[1].setDate(limits[1].getDate()+1) );
NewVal = limits; NewVal = limits;
} }
var now = NewVal[0]+"_"+NewVal[1] var now = NewVal[0]+"_"+NewVal[1]
...@@ -101,18 +93,19 @@ function Final_UpdateTable( action ) { ...@@ -101,18 +93,19 @@ function Final_UpdateTable( action ) {
var dataini = TheBuffer[0]; var dataini = TheBuffer[0];
var datafin = TheBuffer[1]; var datafin = TheBuffer[1];
pr("show me the pubs of the selected period") pr("show me the pubs of the selected period")
console.log( TheBuffer ) // console.log( TheBuffer )
pr("\tfrom ["+dataini+"] to ["+datafin+"]") pr("\tfrom ["+dataini+"] to ["+datafin+"]")
TimeRange = [] TimeRange = []
console.log(dataini, datafin) // console.log("dataini, datafin")
// console.log(dataini, datafin)
$.each(AjaxRecords, function(i, node) { $.each(AjaxRecords, function(i, node) {
if (node.date >= dataini && node.date >= dataini) { if (node.date >= dataini && node.date >= dataini) {
// pr( AjaxRecords[i].date+" : "+AjaxRecords[i].id ) // pr( AjaxRecords[i].date+" : "+AjaxRecords[i].id )
TimeRange.push(node); TimeRange.push(node);
} }
}); });
console.log(TimeRange) // console.log(TimeRange)
MyTable = $('#my-ajax-table').dynatable({ MyTable = $('#my-ajax-table').dynatable({
dataset: { dataset: {
...@@ -307,6 +300,7 @@ function Main_test( Data , SearchFilter ) { ...@@ -307,6 +300,7 @@ function Main_test( Data , SearchFilter ) {
var t1 = get_node_date(AjaxRecords.slice(-1)[0]); var t1 = get_node_date(AjaxRecords.slice(-1)[0]);
oldest = t0; oldest = t0;
latest = t1; latest = t1;
console.log('t0, t1')
console.log(t0, t1) console.log(t0, t1)
TheBuffer = [t0, t1]; TheBuffer = [t0, t1];
......
...@@ -58,10 +58,25 @@ ...@@ -58,10 +58,25 @@
<p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg"> <p id="corpusdisplayer" onclick='Final_UpdateTable("click")' class="btn btn-primary btn-lg">
Close term list Close term list
</p> </p>
<!-- <button title='run test function' onclick="doATest()">
TEST
</button> -->
</a> </a>
<!-- see in javascript function queries.functions['my_state_filter'] -->
<div class="pull-left" style="margin-top:1.85em;">
Filter:
<select id="picklistmenu" name="my_state_filter">
<option value='reset' selected="selected" >All terms</option>
<option value='0'>Mainlist only</option>
<option value='1'>Maplist only</option>
<option value='2'>Stoplist only</option>
</select>
</div>
</h4> </h4>
</div> </div>
<div id="terms_table" class="panel-collapse collapse in no-transition" role="tabpanel"> <div id="terms_table" class="panel-collapse collapse in no-transition" role="tabpanel">
<div class="panel-body"> <div class="panel-body">
<div id="div-table"> <div id="div-table">
...@@ -78,9 +93,9 @@ ...@@ -78,9 +93,9 @@
</div> <!-- /div panel --> </div> <!-- /div panel -->
</div> <!-- /row with the dynatable panels --> </div> <!-- /row with the dynatable panels -->
</div> </div>
<!-- /jumbotron <!-- /jumbotron
<button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning"> <button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">
Import a Corpus-List Import a Corpus-List
</button> </button>
...@@ -90,6 +105,11 @@ ...@@ -90,6 +105,11 @@
<!--
# stub to import a list (aka orange button)
<button id="ImportList" onclick="GetUserPortfolio();" class="btn btn-warning">Import a Corpus-List</button>
-->
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script> <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/bootstrap/bootstrap.min.js" %}"></script> <script type="text/javascript" src="{% static "js/bootstrap/bootstrap.min.js" %}"></script>
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
</center> </center>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<div id="monthly-volume-chart"></div> <div id="monthly-volume-chart"></div>
</div> </div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment