Commit 30b37b48 authored by PkSM3's avatar PkSM3

Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

parents 66318619 7c29e453
......@@ -4,3 +4,4 @@ parsing/Taggers/treetagger/
*.pyc
data_samples
VENV
local_settings.py
import sqlalchemy
from gargantext_web import api
from gargantext_web.db import *
from node import models
from sqlalchemy import create_engine
from sqlalchemy.sql import func
import numpy as np
import collections
ResourceType = models.ResourceType.sa
Resource = models.Resource.sa
NodeType = models.NodeType.sa
NodeNgram = models.Node_Ngram.sa
NodeNodeNgram = models.NodeNodeNgram.sa
NodeNgramNgram = models.NodeNgramNgram.sa
Ngram = models.Ngram.sa
Node_Hyperdata = models.Node_Hyperdata.sa
Hyperdata = models.Hyperdata.sa
Node = models.Node.sa
Corpus = models.Corpus.sa
def get_session():
import sqlalchemy.orm
from django.db import connections
from sqlalchemy.orm import sessionmaker
from aldjemy.core import get_engine
alias = 'default'
connection = connections[alias]
engine = create_engine("postgresql+psycopg2://gargantua:C8kdcUrAQy66U@localhost/gargandb",
use_native_hstore=True)
Session = sessionmaker(bind=engine)
return Session()
session = get_session()
def result2dict(query):
results = dict()
for result in query:
......@@ -42,7 +20,7 @@ def result2dict(query):
def diachronic_specificity(corpus_id, terms, order=True):
'''
'''
Take as parameter Corpus primary key and text of ngrams.
Result is a dictionnary.
Keys are period (years for now)
......@@ -63,24 +41,24 @@ def diachronic_specificity(corpus_id, terms, order=True):
.filter(Node.parent_id == corpus_id)
.group_by(Node.hyperdata['publication_year'])
)
document_filterByngram_year = dict(ngram_frequency_query.all())
document_all_year = dict(document_year_sum_query.all())
#print(document_all_year)
relative_terms_count = dict()
for year, total in document_all_year.items():
terms_count = document_filterByngram_year.get(year, 0)
relative_terms_count[year] = terms_count / total
mean = np.mean(list(relative_terms_count.values()))
relative_terms_count = {
key: (value - mean)
for key, value in relative_terms_count.items()
}
if order == True:
return collections.OrderedDict(sorted(relative_terms_count.items()))
else:
......
import sqlalchemy
from gargantext_web import api
from node import models
from sqlalchemy import create_engine
from sqlalchemy.sql import func
import numpy as np
import collections
from math import log
NodeType = models.NodeType.sa
NodeNgram = models.Node_Ngram.sa
NodeNodeNgram = models.NodeNgramNgram.sa
Ngram = models.Ngram.sa
Node = models.Node.sa
Corpus = models.Corpus.sa
def get_session():
import sqlalchemy.orm
from django.db import connections
from sqlalchemy.orm import sessionmaker
from aldjemy.core import get_engine
alias = 'default'
connection = connections[alias]
engine = create_engine("postgresql+psycopg2://gargantua:C8kdcUrAQy66U@localhost/gargandb",
use_native_hstore=True)
Session = sessionmaker(bind=engine)
return Session()
session = get_session()
type_doc = session.query(NodeType).filter(NodeType.name == "Document").first()
def tfidf(corpus, document, ngram):
'''
Compute TF-IDF (Term Frequency - Inverse Document Frequency)
See: http://en.wikipedia.org/wiki/Tf%E2%80%93idf
'''
try:
#occurences_of_ngram = Node_Ngram.objects.get(node=document, ngram=ngram).weight
occurrences_of_ngram = session.query(NodeNgram)\
.filter(NodeNgram.node_id == document.id)\
.filter(NodeNgram.ngram_id == ngram.id)\
.first().weight
#return(type(occurrences_of_ngram))
#ngrams_by_document = np.sum([ x.weight for x in Node_Ngram.objects.filter(node=document)])
ngrams_by_document = session.query(NodeNgram).filter(NodeNgram.node_id == document.id).count()
term_frequency = occurrences_of_ngram / ngrams_by_document
#return term_frequency
#xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
xx = session.query(Node)\
.filter(Node.parent_id == corpus.id)\
.filter(Node.type_id == type_doc.id) .count()
#yy = Node_Ngram.objects.filter(ngram=ngram).count() # filter: ON node.parent=corpus
yy = session.query(NodeNgram)\
.join(Node, NodeNgram.node_id == Node.id)\
.filter(Node.parent_id == corpus.id)\
.filter(NodeNgram.ngram_id == ngram.id)\
.count()
# print("\t\t\t","occs:",occurrences_of_ngram," || ngramsbydoc:",ngrams_by_document," || TF = occ/ngramsbydoc:",term_frequency," |||||| x:",xx," || y:",yy," || IDF = log(x/y):",log(xx/yy))
inverse_document_frequency= log(xx/yy)
# result = tf * idf
result = term_frequency * inverse_document_frequency
return result
except Exception as error:
print(error)
#corpus = session.query(Node).get(int(102750))
#ngram = session.query(Ngram).get(10885)
##ngram = session.query(Ngram).filter(Ngram.terms == "bayer").first()
#type_doc = session.query(NodeType).filter(NodeType.name == "Document").first()
#doc_id = session.query(NodeNgram.node, NodeNgram.node_id)\
# .join(Node, Node.id == NodeNgram.node_id)\
# .filter(NodeNgram.ngram == ngram)\
# .filter(Node.type_id == type_doc.id)\
# .first()
#document = session.query(Node).get(doc_id[1])
#
#result = tfidf(corpus,document, ngram)
#print(result)
#
{
"directory": "static/bower_components"
}
static/bower_components/
{
"globalstrict": true,
"globals": {
"angular": false,
"describe": false,
"it": false,
"expect": false,
"beforeEach": false,
"afterEach": false,
"module": false,
"inject": false
}
}
\ No newline at end of file
set tabstop=4
set shiftwidth=4
set expandtab
set softtabstop=4
# Gargantext Annotations web application
We also use a number of node.js tools to initialize and test. You must have node.js and
its package manager (npm) installed. You can get them from [http://nodejs.org/](http://nodejs.org/).
## Preview only
Activate your virtualenv and run a simple http server
```
workon gargantext
python3 -m http.server
```
or :
```
npm start
```
Now browse to the app at `http://localhost:8000/app/index.html`.
## Install development tools and dependencies
We have two kinds of dependencies in this project: tools and angular framework code. The tools help
us manage and test the application.
* We get the tools we depend upon via `npm`, the [node package manager][npm].
* We get the angular code via `bower`, a [client-side code package manager][bower].
We have preconfigured `npm` to automatically run `bower` so we can simply do:
```
npm install
```
Behind the scenes this will also call `bower install`. You should find that you have two new
folders in your project.
* `node_modules` - contains the npm packages for the tools we need
* `app/bower_components` - contains the angular framework files
*Note that the `bower_components` folder would normally be installed in the root folder but
angular-seed changes this location through the `.bowerrc` file. Putting it in the app folder makes
it easier to serve the files by a webserver.*
## Directory Layout
This will be adapted to fit the django API code as well. For now, the generic layout is :
```
app/ --> all of the source files for the application
app.css --> default stylesheet
components/ --> all app specific modules
view1/ --> the view1 view template and logic
view1.html --> the partial template
view1.js --> the controller logic
view1_test.js --> tests of the controller
app.js --> main application module
index.html --> app layout file (the main html template file of the app)
```
# Conception and workflow documentation
## TODO : à traduire en anglais
Cette API permet d'éditer les mots-clés miamlistés ou stoplistés associé à un document affiché dans un cadre d'une page web permettant de naviguer à travers un ensemble de document d'un corpus.
### Architecture
- Templates : Django et Angular.js ?
- Communication entre les modules : évènements Angular ($emit et $broadcast)
- Pas de routage entre différentes URL, car ici une seule vue principale basée sur le template django corpus.html
- Modèle d'abstraction de données : côté client (Angular Scopes) et côté serveur (Django Model et SQLAlchemy)
- Composants : TODO lister et décrire les composants client et serveur
- Structure de l'application : organisation du client et du serveur
- Style : Bootstrap et un thème spécifique choisi pour Gargantext
- Gestion des dépendances :
- bower, npm pour le développement web et les tests côté client
- pip requirements pour le côté serveur
## Quelles actions execute l'API ?
- afficher le titre, les auteurs, le résumé, la date de publication et le corps d'un document.
- lecture des mots-clés miamlistés associés à un document (dans le texte et hors du texte).
- lecture des mots-clés stoplistés associés à un document (dans le texte et hors du texte).
- lecture des documents ayant le plus de mots-clés miamlistés associés identiques pour afficher une liste de liens vers de nouveaux documents
- lecture du groupe de mots-clés auquel appartient un mot-clé (synonymes, différentes formes)
- modification du groupe de mots-clés auquel appartient un mot-clé donné
On désigne par mot-clé un NGram.
## Schéma de l'API
Liste des endpoints
### Lecture des données
- POST '^api/nodes/(\d+)/children/queries$' : liste des NGrams d'un document avec la possibilité de filtrer par NGrams
- GET '^api/nodes$' : liste des identifiants de mots-clés filtrés par type (NGram ou autre) pour un identifiant de parent (Document ou autre)
- GET '^api/nodes/(\d+)/ngrams$': liste des termes des mots-clés associés à un Document parent, filtrés par termes
- GET ^api/nodes/(\d+)/children/metadata$ : liste des metadata d'un Node, c'est-à-dire :
- pour un document : titre, auteur, etc
- pour un NGram : stoplisté ou miamlisté ?
### Écriture des données
TODO
## Workflow
Nous nous fixons sur cette documentation et spécification de l'API
- en parallèle : développement de l'API et prototypage de l'interface
- le prototypage de l'interface peut modifier l'API si besoin
### Spécifications des fondations de l'interface
- résolutions d'écran
- browsers
- langue: english only
- SEO: aucun ?
- collaboratif : oui, les modifications d'un autre utilisateurs seront notifiées à tous les utilisateurs présent sur le même corpus de documents
- fonctionne offline ?
### Working process
- follow board is updated regularly (https://trello.com/b/96ItkDBS/gargantext-miamlists-and-stoplists)[on Trello]
- calendrier prévisionnel: TODO
- interactions entre les acteurs: emails
- git, branches : branche "elias", `git pull --rebase origin master` réguliers
- prévision des revues de code et de l'interface : TODO
### Plateforme
- Python 3.4
- Django 1.6
- Postgresql 9.3 + HSTORE
- SQLAlchemy
- Bootstrap CSS
- Angular.js
### Outils de qualité de code
- pylint
- jshint (voir .jshintrc)
- indentations : 4 espaces (voir .lvimrc)
- nettoyage automatique des espaces en fin de ligne
## Tests
There are two kinds of tests possible : Unit tests and End to End tests.
- côté client : étudier karma.js et protractor
- définir la stratégie de tests : TODO
## Déploiement
- définir le processus de déploiement
- prévoir un système de monitoring des erreurs du serveur une fois en ligne
- Sentry ?
## Updating the web application
Previously we recommended that you merge in changes to angular-seed into your own fork of the project.
Now that the angular framework library code and tools are acquired through package managers (npm and
bower) you can use these tools instead to update the dependencies.
You can update the tool dependencies by running:
```
npm update
```
This will find the latest versions that match the version ranges specified in the `package.json` file.
You can update the Angular dependencies by running:
```
bower update
```
This will find the latest versions that match the version ranges specified in the `bower.json` file.
### Running the App in Production
This really depends on how complex your app is and the overall infrastructure of your system, but
the general rule is that all you need in production are all the files under the `app/` directory.
Everything else should be omitted.
Angular apps are really just a bunch of static html, css and js files that just need to be hosted
somewhere they can be accessed by browsers.
If your Angular app is talking to the backend server via xhr or other means, you need to figure
out what is the best way to host the static files to comply with the same origin policy if
applicable. Usually this is done by hosting the files by the backend server or through
reverse-proxying the backend server(s) and webserver(s).
##
[AngularJS]: http://angularjs.org/
[git]: http://git-scm.com/
[bower]: http://bower.io
[npm]: https://www.npmjs.org/
[node]: http://nodejs.org
[protractor]: https://github.com/angular/protractor
[jasmine]: http://jasmine.github.io
[karma]: http://karma-runner.github.io
[http-server]: https://github.com/nodeapps/http-server
{
"name": "annotations",
"description": "Annotations for Gargantext",
"version": "0.0.1",
"license": "GPLv3",
"private": true,
"dependencies": {
"angular": "~1.2.x",
"angular-loader": "~1.2.x",
"angular-resource": "~1.2.x",
"bootstrap": "~3.x"
},
"resolutions": {
"angular": "~1.2.x"
}
}
{
"name": "gargantext-annotations",
"private": true,
"version": "0.0.1",
"description": "Annotations for gargantext",
"license": "GPLv3",
"devDependencies": {
"karma": "~0.10",
"protractor": "^1.1.1",
"http-server": "^0.6.1",
"bower": "^1.3.1",
"shelljs": "^0.2.6",
"karma-junit-reporter": "^0.2.2"
},
"scripts": {
"postinstall": "bower install",
"prestart": "npm install",
"start": "http-server -a localhost -p 8000 -c-1",
"pretest": "npm install",
"test": "karma start karma.conf.js",
"test-single-run": "karma start karma.conf.js --single-run",
"preupdate-webdriver": "npm install",
"update-webdriver": "webdriver-manager update",
"preprotractor": "npm run update-webdriver",
"protractor": "protractor e2e-tests/protractor.conf.js",
"update-index-async": "node -e \"require('shelljs/global'); sed('-i', /\\/\\/@@NG_LOADER_START@@[\\s\\S]*\\/\\/@@NG_LOADER_END@@/, '//@@NG_LOADER_START@@\\n' + sed(/sourceMappingURL=angular-loader.min.js.map/,'sourceMappingURL=bower_components/angular-loader/angular-loader.min.js.map','static/bower_components/angular-loader/angular-loader.min.js') + '\\n//@@NG_LOADER_END@@', 'templates/annotations/demo.html');\""
}
}
/* app css stylesheet */
.delete-keyword, .occurrences {
vertical-align: super;
font-size: 70%;
}
.delete-keyword {
cursor: pointer;
}
.center-block {
display: block;
margin-left: auto;
margin-right: auto;
}
.keyword-inline {
display: inline;
}
.keyword-inline:hover {
text-decoration: none;
}
.nav-tabs {
border-bottom: none;
}
.miamword {
color: black;
background-color: rgba(60, 118, 61, 0.5);
cursor: pointer;
}
.stopword {
color: black;
background-color: rgba(169, 68, 66, 0.2);
cursor: pointer;
}
.global-stopword {
color: black;
background-color: rgba(169, 68, 66, 0.05);
cursor: pointer;
}
.main-panel, .text-panel, .words-panel {
height: 400px;
margin: 10px 0px;
}
.text-panel {
overflow-y: auto;
}
.words-list {
margin-bottom: 5px;
height: 250px;
}
.keyword-container {
max-width: 200px;
}
.keyword {
word-break: break-all;
}
.list-group-item {
display: inline-block;
float: left;
padding: 5px;
}
.words-pagination {
margin: 5px 0;
}
.text-panel p, .text-panel h3 {
-webkit-transition: all 0.25s linear;
-moz-transition: all 0.25s linear;
-ms-transition: all 0.25s linear;
-o-transition: all 0.25s linear;
transition: all 0.25s linear;
}
.selection {
color: #aaa;
}
::selection {
color: black;
background-color: rgba(0, 0, 0, 0.4);
}
.noselection {
-webkit-touch-callout: none;
-webkit-user-select: none;
-khtml-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
.selection-menu {
display: none;
position: absolute;
color: #394141;
background: white;
font-size: 0.8em;
font-weight: 600;
-webkit-box-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5);
-moz-box-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5);
box-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5);
}
/*.selection-menu:before {
content: '';
position: absolute;
left: -10px;
top: 0px;
border-right: solid white 10px;
border-top: solid transparent 8px;
border-bottom: solid transparent 8px;
}*/
.selection-menu ul {
list-style: none;
margin: 0;
padding: 0;
}
.selection-menu li {
border-bottom: solid thin #CCC;
padding: 10px;
white-space: nowrap;
}
.selection-menu [class*="glyphicon"] {
min-width: 25px;
display: inline-block;
text-align: center;
border-right: solid thin #CCC;
margin-right: 5px;
}
This diff is collapsed.
(function () {
'use strict';
var http = angular.module('annotationsAppHttp', ['ngResource']);
/*
* Read Document
*/
http.factory('DocumentHttpService', function($resource) {
return $resource(
window.ANNOTATION_API_URL + "document" + '/:docId/',
{
docId: '@docId'
},
{
get: {
method: 'GET',
params: {docId: '@docId'}
}
}
);
});
/*
* Read Ngram Lists
*/
http.factory('NgramListHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/',
{
listId: '@listId'
},
{
get: {
method: 'GET',
params: {listId: '@listId'}
}
}
);
});
/*
* Create, modify or delete on Ngram of a list
*/
http.factory('NgramHttpService', function ($resource) {
return $resource(
window.ANNOTATION_API_URL + 'lists' + '/:listId/ngrams/' + ':ngramId/',
{
listId: '@listId'
},
{
post: {
method: 'POST',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
},
delete: {
method: 'DELETE',
params: {'listId': '@listId', 'ngramId': '@ngramId'}
}
}
);
});
// return {
// newAnnotationObject: function(text, category, level) {
// return {
// 'text': text.trim(),
// 'category': category,
// 'level': level
// };
// },
// create: function(keyword, $rootScope) {
// if ($rootScope.annotations === undefined) $rootScope.annotations = [];
// // find duplicate by text
// var existing = _.find(
// $rootScope.annotations,
// function(annotation) { return annotation.text.trim().toLowerCase() === keyword.text.trim().toLowerCase(); }
// );
// // delete existing conflicting data before adding new
// if (existing) {
// if (existing.category == keyword.category && existing.level == keyword.level) return;
// this.delete(existing, $rootScope);
// }
// // TODO remove server mocking
// var mock = _.extend(keyword, {
// 'uuid': jQuery.now().toString(),
// 'occurrences': 322
// });
//
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations.push(mock);
// });
// });
//
// return mock;
// },
// delete: function(keyword, $rootScope) {
// var filtered = _.filter($rootScope.annotations, function(item) {
// if (item.uuid == keyword.uuid) {
// return false;
// } else {
// return true;
// }
// });
// $timeout(function() {
// $rootScope.$apply(function() {
// $rootScope.annotations = filtered;
// });
// });
// }
// };
})(window);
<span ng-if="keyword.category == 'miamlist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="miamlist">×</span>
<a ng-if="keyword.category == 'miamlist'" href="#" data-toggle="tooltip" class="keyword miamword">{{keyword.text}}</a>
<span ng-if="keyword.category == 'stoplist'" ng-click='onDeleteClick()' class="delete-keyword" data-keyword-id="{{keyword.uuid}}" data-keyword-text="{{keyword.text}}" data-keyword-category="stoplist">×</span>
<a ng-if="keyword.category == 'stoplist'" href="#" data-toggle="tooltip" class="keyword stopword">{{keyword.text}}</a>
<span class="occurrences" data-keyword-id="{{keyword.uuid}}">{{keyword.occurrences}}</span>
// include angular loader, which allows the files to load in any order
//@@NG_LOADER_START@@
// You need to run `npm run update-index-async` to inject the angular async code here
//@@NG_LOADER_END@@
// include a third-party async loader library
/*!
* $script.js v1.3
* https://github.com/ded/script.js
* Copyright: @ded & @fat - Dustin Diaz, Jacob Thornton 2011
* Follow our software http://twitter.com/dedfat
* License: MIT
*/
!function(a,b,c){function t(a,c){var e=b.createElement("script"),f=j;e.onload=e.onerror=e[o]=function(){e[m]&&!/^c|loade/.test(e[m])||f||(e.onload=e[o]=null,f=1,c())},e.async=1,e.src=a,d.insertBefore(e,d.firstChild)}function q(a,b){p(a,function(a){return!b(a)})}var d=b.getElementsByTagName("head")[0],e={},f={},g={},h={},i="string",j=!1,k="push",l="DOMContentLoaded",m="readyState",n="addEventListener",o="onreadystatechange",p=function(a,b){for(var c=0,d=a.length;c<d;++c)if(!b(a[c]))return j;return 1};!b[m]&&b[n]&&(b[n](l,function r(){b.removeEventListener(l,r,j),b[m]="complete"},j),b[m]="loading");var s=function(a,b,d){function o(){if(!--m){e[l]=1,j&&j();for(var a in g)p(a.split("|"),n)&&!q(g[a],n)&&(g[a]=[])}}function n(a){return a.call?a():e[a]}a=a[k]?a:[a];var i=b&&b.call,j=i?b:d,l=i?a.join(""):b,m=a.length;c(function(){q(a,function(a){h[a]?(l&&(f[l]=1),o()):(h[a]=1,l&&(f[l]=1),t(s.path?s.path+a+".js":a,o))})},0);return s};s.get=t,s.ready=function(a,b,c){a=a[k]?a:[a];var d=[];!q(a,function(a){e[a]||d[k](a)})&&p(a,function(a){return e[a]})?b():!function(a){g[a]=g[a]||[],g[a][k](b),c&&c(d)}(a.join("|"));return s};var u=a.$script;s.noConflict=function(){a.$script=u;return this},typeof module!="undefined"&&module.exports?module.exports=s:a.$script=s}(this,document,setTimeout)
// load all of the dependencies asynchronously.
var S = window.STATIC_URL;
$script([
S + 'bower_components/angular/angular.min.js',
S + 'bower_components/bootstrap/dist/js/bootstrap.min.js',
S + 'bower_components/angular-loader/angular-loader.min.js',
S + 'bower_components/underscore/underscore-1.5.2.js',
//'bower_components/angular-route/angular-route.js',
], function() {
$script([
S + 'bower_components/angular-resource/angular-resource.min.js'], function() {
$script([S + 'annotations/http.js', S + 'annotations/app.js'], function() {
// when all is done, execute bootstrap angular application (replace ng-app directive)
angular.bootstrap(document.getElementById("annotationsApp"), ['annotationsApp']);
});
});
});
<ul class="noselection">
<li>{{level}}<span ng-if="category !== null"> {{category}}</span></li>
<li class="miamword" ng-if="local_miamlist === true" ng-click="onClick($event, 'post', 'miamlist', 'local')">add to miam-list</li>
<li class="miamword" ng-if="local_miamlist === false" ng-click="onClick($event, 'delete', 'miamlist', 'local')">remove from miam-list</li>
<li class="stopword" ng-if="local_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'local')">add to local stop-list</li>
<li class="stopword" ng-if="local_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'local')">remove from local stop-list</li>
<li class="stopword" ng-if="global_stoplist === true" ng-click="onClick($event, 'post', 'stoplist', 'global')">add to global stop-list</li>
<li class="stopword" ng-if="global_stoplist === false" ng-click="onClick($event, 'delete', 'stoplist', 'global')">remove from global stop-list</li>
</ul>
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(){'use strict';function d(a){return function(){var c=arguments[0],b,c="["+(a?a+":":"")+c+"] http://errors.angularjs.org/1.2.28/"+(a?a+"/":"")+c;for(b=1;b<arguments.length;b++)c=c+(1==b?"?":"&")+"p"+(b-1)+"="+encodeURIComponent("function"==typeof arguments[b]?arguments[b].toString().replace(/ \{[\s\S]*$/,""):"undefined"==typeof arguments[b]?"undefined":"string"!=typeof arguments[b]?JSON.stringify(arguments[b]):arguments[b]);return Error(c)}}(function(a){var c=d("$injector"),b=d("ng");a=a.angular||
(a.angular={});a.$$minErr=a.$$minErr||d;return a.module||(a.module=function(){var a={};return function(e,d,f){if("hasOwnProperty"===e)throw b("badname","module");d&&a.hasOwnProperty(e)&&(a[e]=null);return a[e]||(a[e]=function(){function a(c,d,e){return function(){b[e||"push"]([c,d,arguments]);return g}}if(!d)throw c("nomod",e);var b=[],h=[],k=a("$injector","invoke"),g={_invokeQueue:b,_runBlocks:h,requires:d,name:e,provider:a("$provide","provider"),factory:a("$provide","factory"),service:a("$provide",
"service"),value:a("$provide","value"),constant:a("$provide","constant","unshift"),animation:a("$animateProvider","register"),filter:a("$filterProvider","register"),controller:a("$controllerProvider","register"),directive:a("$compileProvider","directive"),config:k,run:function(a){h.push(a);return this}};f&&k(f);return g}())}}())})(window)})(window);
//# sourceMappingURL=angular-loader.min.js.map
/*
AngularJS v1.2.28
(c) 2010-2014 Google, Inc. http://angularjs.org
License: MIT
*/
(function(H,a,A){'use strict';function D(p,g){g=g||{};a.forEach(g,function(a,c){delete g[c]});for(var c in p)!p.hasOwnProperty(c)||"$"===c.charAt(0)&&"$"===c.charAt(1)||(g[c]=p[c]);return g}var v=a.$$minErr("$resource"),C=/^(\.[a-zA-Z_$][0-9a-zA-Z_$]*)+$/;a.module("ngResource",["ng"]).factory("$resource",["$http","$q",function(p,g){function c(a,c){this.template=a;this.defaults=c||{};this.urlParams={}}function t(n,w,l){function r(h,d){var e={};d=x({},w,d);s(d,function(b,d){u(b)&&(b=b());var k;if(b&&
b.charAt&&"@"==b.charAt(0)){k=h;var a=b.substr(1);if(null==a||""===a||"hasOwnProperty"===a||!C.test("."+a))throw v("badmember",a);for(var a=a.split("."),f=0,c=a.length;f<c&&k!==A;f++){var g=a[f];k=null!==k?k[g]:A}}else k=b;e[d]=k});return e}function e(a){return a.resource}function f(a){D(a||{},this)}var F=new c(n);l=x({},B,l);s(l,function(h,d){var c=/^(POST|PUT|PATCH)$/i.test(h.method);f[d]=function(b,d,k,w){var q={},n,l,y;switch(arguments.length){case 4:y=w,l=k;case 3:case 2:if(u(d)){if(u(b)){l=
b;y=d;break}l=d;y=k}else{q=b;n=d;l=k;break}case 1:u(b)?l=b:c?n=b:q=b;break;case 0:break;default:throw v("badargs",arguments.length);}var t=this instanceof f,m=t?n:h.isArray?[]:new f(n),z={},B=h.interceptor&&h.interceptor.response||e,C=h.interceptor&&h.interceptor.responseError||A;s(h,function(a,b){"params"!=b&&("isArray"!=b&&"interceptor"!=b)&&(z[b]=G(a))});c&&(z.data=n);F.setUrlParams(z,x({},r(n,h.params||{}),q),h.url);q=p(z).then(function(b){var d=b.data,k=m.$promise;if(d){if(a.isArray(d)!==!!h.isArray)throw v("badcfg",
h.isArray?"array":"object",a.isArray(d)?"array":"object");h.isArray?(m.length=0,s(d,function(b){"object"===typeof b?m.push(new f(b)):m.push(b)})):(D(d,m),m.$promise=k)}m.$resolved=!0;b.resource=m;return b},function(b){m.$resolved=!0;(y||E)(b);return g.reject(b)});q=q.then(function(b){var a=B(b);(l||E)(a,b.headers);return a},C);return t?q:(m.$promise=q,m.$resolved=!1,m)};f.prototype["$"+d]=function(b,a,k){u(b)&&(k=a,a=b,b={});b=f[d].call(this,b,this,a,k);return b.$promise||b}});f.bind=function(a){return t(n,
x({},w,a),l)};return f}var B={get:{method:"GET"},save:{method:"POST"},query:{method:"GET",isArray:!0},remove:{method:"DELETE"},"delete":{method:"DELETE"}},E=a.noop,s=a.forEach,x=a.extend,G=a.copy,u=a.isFunction;c.prototype={setUrlParams:function(c,g,l){var r=this,e=l||r.template,f,p,h=r.urlParams={};s(e.split(/\W/),function(a){if("hasOwnProperty"===a)throw v("badname");!/^\d+$/.test(a)&&(a&&RegExp("(^|[^\\\\]):"+a+"(\\W|$)").test(e))&&(h[a]=!0)});e=e.replace(/\\:/g,":");g=g||{};s(r.urlParams,function(d,
c){f=g.hasOwnProperty(c)?g[c]:r.defaults[c];a.isDefined(f)&&null!==f?(p=encodeURIComponent(f).replace(/%40/gi,"@").replace(/%3A/gi,":").replace(/%24/g,"$").replace(/%2C/gi,",").replace(/%20/g,"%20").replace(/%26/gi,"&").replace(/%3D/gi,"=").replace(/%2B/gi,"+"),e=e.replace(RegExp(":"+c+"(\\W|$)","g"),function(a,c){return p+c})):e=e.replace(RegExp("(/?):"+c+"(\\W|$)","g"),function(a,c,d){return"/"==d.charAt(0)?d:c+d})});e=e.replace(/\/+$/,"")||"/";e=e.replace(/\/\.(?=\w+($|\?))/,".");c.url=e.replace(/\/\\\./,
"/.");s(g,function(a,e){r.urlParams[e]||(c.params=c.params||{},c.params[e]=a)})}};return t}])})(window,window.angular);
//# sourceMappingURL=angular-resource.min.js.map
/* Include this file in your html if you are using the CSP mode. */
@charset "UTF-8";
[ng\:cloak], [ng-cloak], [data-ng-cloak], [x-ng-cloak],
.ng-cloak, .x-ng-cloak,
.ng-hide {
display: none !important;
}
ng\:form {
display: block;
}
.ng-animate-block-transitions {
transition:0s all!important;
-webkit-transition:0s all!important;
}
/* show the element during a show/hide animation when the
* animation is ongoing, but the .ng-hide class is active */
.ng-hide-add-active, .ng-hide-remove {
display: block!important;
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
{% load staticfiles %}
<!DOCTYPE html>
<!--[if lt IE 7]> <html lang="en" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html lang="en" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html lang="en" class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html lang="en" class="no-js">
<!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<title>Gargantext corpus annotations editor</title>
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{% static 'bower_components/bootstrap/dist/css/bootstrap.min.css' %}">
<link rel="stylesheet" href="{% static 'bower_components/angular/angular-csp.css' %}">
<link rel="stylesheet" href="{% static 'annotations/app.css' %}">
<script src="{% static 'bower_components/jquery/dist/jquery.min.js' %}"></script>
</head>
<body>
<!-- TODO integrate this later into the corpus.html django template -->
<div id="annotationsApp">
<div class="container-fluid">
<div class="row-fluid main-panel" ng-controller="IntraTextController">
<div class="col-md-4 col-xs-4 tabbable words-panel">
<ul class="nav nav-tabs">
<li class="active"><a href="#tab1" data-toggle="tab">Miamwords</a></li>
<li><a href="#tab2" data-toggle="tab">Local stopwords</a></li>
</ul>
<div class="tab-content">
<div class="tab-pane active" id="tab1">
<ul class="list-group words-list">
<div ng-if="extra_miamlist.length == 0" class="alert alert-info" role="alert">No extra-text miam-word yet</div>
<li ng-repeat="keyword in extra_miamlist | startFrom:currentMiamPage*pageSize | limitTo:pageSize" class="list-group-item">
<div ng-controller="ExtraAnnotationController" keyword-template class="keyword-container"></div>
</li>
</ul>
<nav ng-class="{invisible: numMiamPages() - 1 == 0}" class="clearfix">
<ul class="pagination pagination-s pull-right words-pagination">
<li ng-class="{disabled: currentMiamPage == 0}"><a ng-click="previousMiamPage()" class="glyphicon glyphicon-backward"></a></li>
<li ng-class="{disabled: currentMiamPage >= numMiamPages()-1}"><a ng-click="nextMiamPage()" class="glyphicon glyphicon-forward"></a></li>
</ul>
</nav>
<div class="form-group">
<input type="text" class="form-control" id="miamlist-input" ng-keypress="onMiamlistSubmit($event)">
<button type="submit" class="btn btn-default btn-primary" ng-click="onMiamlistSubmit($event)">Add</button>
</div>
</div>
<div class="tab-pane" id="tab2">
<ul class="list-group words-list clearfix">
<div ng-if="extra_stoplist.length == 0" class="alert alert-info" role="alert">No extra-text stop-word yet</div>
<li ng-repeat="keyword in extra_stoplist | startFrom:currentStopPage*pageSize | limitTo:pageSize" class="list-group-item"><div ng-controller="ExtraAnnotationController" keyword-template></div></li>
</ul>
<nav ng-class="{invisible: numStopPages() - 1 == 0}" class="clearfix">
<ul class="pagination pagination-s pull-right words-pagination">
<li ng-class="{disabled: currentStopPage == 0}"><a ng-click="previousStopPage()" class="glyphicon glyphicon-backward"></a></li>
<li ng-class="{disabled: currentStopPage >= numStopPages()-1}"><a ng-click="nextStopPage()" class="glyphicon glyphicon-forward"></a></li>
</ul>
</nav>
<div class="form-group">
<input type="text" class="form-control" id="stoplist-input" ng-keypress="onStoplistSubmit($event)">
<button type="submit" class="btn btn-default" ng-click="onStoplistSubmit($event)">Exclude</button>
</div>
</div>
</div>
</div>
<div class="col-md-8 col-xs-8 text-panel" ng-controller="DocController" id="document">
<div class="row-fluid clearfix">
<div class="col-md-7 col-xs-7">
<h3>{[{title}]}</h3>
</div>
<div class="col-md-5 col-xs-5">
<nav>
<ul class="pager">
<li ng-if="current_page_number > 1"><a ng-click="onPreviousClick()" href="#">Previous</a></li>
<li ng-if="current_page_number < last_page_number"><a ng-click="onNextClick()" href="#">Next</a></li>
</ul>
</nav>
</div>
</div>
<div class="row-fluid cleafix">
<ul class="breadcrumb">
<li>{[{authors}]}</li>
<li>{[{journal}]}</li>
<li class="active pull-right">{[{publication_date}]}</li>
</ul>
</div>
<h4>Abstract</h4>
<p id="abstract-text" class="text-container"></p>
<h4>Article</h4>
<p id="full-text" class="text-container"></p>
</div>
</div> <!-- end of the main row -->
</div>
<!-- this menu is over the text -->
<div ng-controller="AnnotationMenuController" id="selection" class="selection-menu" selection-template></div>
</div>
<!--[if lt IE 7]>
<p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p>
<![endif]-->
<script type="application/javascript">
window.STATIC_URL = "{% static '' %}";
window.ANNOTATION_API_URL = "{{ api_url }}";
</script>
<script src="{% static 'annotations/main.js' %}"></script>
</body>
</html>
from django.conf.urls import patterns, url
from annotations import views
urlpatterns = patterns('',
url(r'^demo/$', views.demo),
url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
#url(r'^document/(?P<doc_id>[0-9]+)/ngrams/(?P<ngram_id>[0-9]+)$', views.DocumentNgram.as_view()), # actions on ngram from a document
url(r'^lists/(?P<list_id>[0-9]+)$', views.NgramList.as_view()), # actions on list filtered by document
url(r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$', views.Ngram.as_view()), # actions on ngram from a list optionally filtered by document
)
from urllib.parse import urljoin
import json
from django.shortcuts import render_to_response
from django.template import RequestContext
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.renderers import JSONRenderer
from node.models import Node
from gargantext_web.db import *
def demo(request):
"""Demo page, temporary"""
return render_to_response('annotations/demo.html', {
'api_url': urljoin(request.get_host(), '/annotations/')
}, context_instance=RequestContext(request))
class Document(APIView):
"""Read-only Document"""
renderer_classes = (JSONRenderer,)
def get(self, request, doc_id):
"""Document by ID"""
node = session.query(Node).filter(Node.id == doc_id).first()
# TODO 404 if not Document or ID not found
data = {
'title': node.hyperdata.get('title'),
'authors': node.hyperdata.get('authors'),
'journal': node.hyperdata.get('journal'),
'publication_date': node.hyperdata.get('publication_date'),
'full_text': node.hyperdata.get('full_text'),
'abstract_text': node.hyperdata.get('abstract'),
'id': node.id,
'current_page_number': 4, # TODO remove, this is client side
'last_page_number': 30 # TODO remove, this is client side
}
# return formatted result
return Response(data)
class NgramList(APIView):
"""Read and Write Annotations"""
renderer_classes = (JSONRenderer,)
def get(self, request, list_id):
"""Get All for on List ID"""
doc_id = request.GET.get('docId')
# TODO DB query
data = { '%s' % list_id : { '%s' % doc_id : [
{
'uuid': '1',
'text': 'what',
'category': 'stoplist',
'level': 'global',
'occurrences': 1
},
{
'uuid': '2',
'text': 'rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '3',
'text': 'etsy',
'category': 'stoplist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '4',
'text': 'employees',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '5',
'text': '2010',
'category': 'stoplist',
'level': 'global',
'occurrences': 1
},
{
'uuid': '6',
'text': 'stoplist keyword',
'category': 'stoplist',
'level': 'local',
'occurrences': 255
},
{
'uuid': '7',
'text': 'another stoplist keyword',
'category': 'stoplist',
'level': 'local',
'occurrences': 23
},
{
'uuid': '8',
'text': 'dmc-gm5',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '9',
'text': 'scale of the GM-series',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '10',
'text': 'engineering rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '11',
'text': 'pixel electronic viewfinder',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '12',
'text': 'viewfinder',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '13',
'text': 'pixel electronic',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '14',
'text': 'GM',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '15',
'text': 'support rotations',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '16',
'text': 'miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '17',
'text': 'miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 1
},
{
'uuid': '18',
'text': 'another miamlist keyword',
'category': 'miamlist',
'level': 'local',
'occurrences': 3
}
]}}
return Response(data)
class Ngram(APIView):
"""Read and Write Annotations"""
renderer_classes = (JSONRenderer,)
def delete(self, request, list_id, ngram_id):
"""
TODO Delete one annotation by id
associated with one Document (remove edge)
"""
doc_id = request.GET.get('docId')
annotationId = request.GET.get("annotationId")
print(annotationDict)
# TODO DB query
return Response({})
def post(self, request, list_id, ngram_id):
"""
TODO update one annotation (document level)
associated with one Document (add edge)
"""
doc_id = request.GET.get('docId')
annotationDict = json.loads(request.POST.get("annotation"))
print(annotationDict)
# TODO DB query
return Response(annotationDict)
from django.http import HttpResponseNotFound, HttpResponse, Http404
from django.http import HttpResponse, Http404
from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from django.db.models import Avg, Max, Min, Count, Sum
# from node.models import Language, ResourceType, Resource
# from node.models import Node, NodeType, Node_Resource, Project, Corpus
from sqlalchemy import text, distinct
from sqlalchemy import text, distinct, or_
from sqlalchemy.sql import func
from sqlalchemy.orm import aliased
from gargantext_web.views import move_to_trash
from .db import *
from gargantext_web.db import *
from node import models
def DebugHttpResponse(data):
return HttpResponse('<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
......@@ -49,7 +46,6 @@ _ngrams_order_columns = {
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.permissions import IsAuthenticated
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.exceptions import APIException as _APIException
......@@ -69,7 +65,7 @@ _operators = {
">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value),
"in": lambda field, value: (field.in_(value)),
"in": lambda field, value: (or_(*tuple(field == x for x in value))),
"contains": lambda field, value: (field.contains(value)),
"startswith": lambda field, value: (field.startswith(value)),
}
......@@ -128,8 +124,10 @@ class NodesChildrenNgrams(APIView):
class NodesChildrenDuplicates(APIView):
def _fetch_duplicates(self, request, node_id, extra_columns=[], min_count=1):
def _fetch_duplicates(self, request, node_id, extra_columns=None, min_count=1):
# input validation
if extra_columns is None:
extra_columns = []
if 'keys' not in request.GET:
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
......@@ -194,7 +192,7 @@ class NodesChildrenDuplicates(APIView):
kept_node_ids_query = self._fetch_duplicates(request, node_id, [func.min(Node.id).label('id')], 0)
kept_node_ids = [kept_node.id for kept_node in kept_node_ids_query]
# TODO with new orm
duplicate_nodes = models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
duplicate_nodes = models.Node.objects.filter( parent_id=node_id ).exclude(id__in=kept_node_ids)
# # delete the stuff
# delete_query = (session
# .query(Node)
......@@ -217,7 +215,7 @@ class NodesChildrenDuplicates(APIView):
class NodesChildrenMetatadata(APIView):
def get(self, request, node_id):
# query hyperdata keys
ParentNode = aliased(Node)
hyperdata_query = (session
......@@ -278,7 +276,7 @@ class NodesChildrenMetatadata(APIView):
class NodesChildrenQueries(APIView):
def _parse_filter(self, filter):
# validate filter keys
filter_keys = {'field', 'operator', 'value'}
if set(filter) != filter_keys:
......@@ -378,7 +376,7 @@ class NodesChildrenQueries(APIView):
raise APIException('In the query\'s "retrieve" parameter, a "list" should be provided as an array', 400)
if retrieve['type'] not in retrieve_types:
raise APIException('Unrecognized "type": "%s" in the query\'s "retrieve" parameter. Possible values are: "%s".' % (retrieve['type'], '", "'.join(retrieve_types), ), 400)
if retrieve['type'] == 'fields':
fields_names = ['id'] + retrieve['list'] if 'id' not in retrieve['list'] else retrieve['list']
elif retrieve['type'] == 'aggregates':
......@@ -455,14 +453,14 @@ class NodesChildrenQueries(APIView):
for filter in request.DATA.get('filters', []):
# parameters extraction & validation
field, operator, value = self._parse_filter(filter)
#
#
if field[0] == 'hyperdata':
# which hyperdata?
hyperdata = session.query(Hyperdata).filter(Hyperdata.name == field[1]).first()
if hyperdata is None:
hyperdata_query = session.query(Hyperdata.name).order_by(Hyperdata.name)
hyperdata_names = [hyperdata.name for hyperdata in hyperdata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(hyperdata_names), field[1]), 400)
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(hyperdata_names), field[1]), 400)
# check or create Node_Hyperdata alias; join if necessary
if hyperdata.id in hyperdata_aliases:
hyperdata_alias = hyperdata_aliases[hyperdata.id]
......@@ -480,14 +478,14 @@ class NodesChildrenQueries(APIView):
getattr(hyperdata_alias, 'value_' + hyperdata.type),
value
))
elif field[0] == 'ngrams':
elif field[0] == 'ngrams':
query = query.filter(
Node.id.in_(session
.query(Node_Ngram.node_id)
.filter(Node_Ngram.ngram_id == Ngram.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(operator(
getattr(Ngram, field[1]),
value
map(lambda x: x.replace('-', ' '), value)
))
)
)
......@@ -597,17 +595,17 @@ class Nodes(APIView):
# it should take the subnodes into account as well,
# for better constistency...
def delete(self, request, node_id):
user = request.user
node = session.query(Node).filter(Node.id == node_id).first()
msgres = str()
try:
move_to_trash(node_id)
msgres = node_id+" moved to Trash"
except Exception as error:
msgres ="error deleting : " + node_id + str(error)
......@@ -632,7 +630,7 @@ class CorpusController:
# raise Http403("Unauthorized access.")
return corpus
@classmethod
def ngrams(cls, request, node_id):
......@@ -667,5 +665,3 @@ class CorpusController:
)
else:
raise ValidationError('Unrecognized "format=%s", should be "csv" or "json"' % (format, ))
from gargantext_web import settings
from node import models
from django.conf import settings
from node import models
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor']
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor', 'User']
# initialize sqlalchemy
......@@ -14,9 +14,16 @@ from sqlalchemy import create_engine, MetaData, Table, Column, ForeignKey
from sqlalchemy.types import Integer, String, DateTime
from sqlalchemy.dialects.postgresql import JSON
engine = create_engine('postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
**settings.DATABASES['default']
))
# SQLAlchemy session management
def get_engine():
from sqlalchemy import create_engine
url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(
**settings.DATABASES['default']
)
return create_engine(url, use_native_hstore=True)
engine = get_engine()
Base = automap_base()
Base.prepare(engine, reflect=True)
......@@ -112,7 +119,7 @@ def literalquery(statement, dialect=None):
return str(value)
elif isinstance(value, datetime):
return repr(str(value))
else:
else:
if isinstance(value, str):
return value.encode('UTF-8')
else:
......@@ -121,17 +128,6 @@ def literalquery(statement, dialect=None):
return LiteralCompiler(dialect, statement)
# SQLAlchemy session management
def get_engine():
from sqlalchemy import create_engine
url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
**settings.DATABASES['default']
)
return create_engine(url, use_native_hstore=True)
engine = get_engine()
def get_sessionmaker():
from sqlalchemy.orm import sessionmaker
return sessionmaker(bind=engine)
......@@ -199,6 +195,7 @@ def get_cursor():
'user': db_settings['USER'],
'password': db_settings['PASSWORD'],
'host': db_settings['HOST'],
'port': db_settings['PORT']
})
return db, db.cursor()
......@@ -232,4 +229,3 @@ class bulk_insert:
return ''
readline = read
......@@ -10,17 +10,17 @@ def paragraph_lorem(size_target=450):
Function that returns paragraph with false latin language.
size_target is the number of random words that will be given.
'''
lorem = random_words.LoremIpsum()
sentences_list = lorem.get_sentences_list(sentences=5)
paragraph_size = 0
while paragraph_size < size_target :
sentences_list.append(lorem.get_sentence())
paragraph = ' '.join(sentences_list)
paragraph_size = len(paragraph)
return(paragraph)
......@@ -29,48 +29,48 @@ def paragraph_gargantua(size_target=500):
Function that returns paragraph with chapter titles of Gargantua.
size_target is the number of random words that will be given.
'''
paragraph = list()
paragraph_size = 0
chapter_number = 1
while paragraph_size < size_target and chapter_number < 6:
chapitre = open('/srv/gargantext/static/docs/gargantua_book/gargantua_chapter_' + str(chapter_number) + '.txt', 'r')
paragraph.append(random.choice(chapitre.readlines()).strip())
chapitre.close()
paragraph_size = len(' '.join(paragraph))
chapter_number += 1
return(' '.join(paragraph))
def random_letter(mot, size_min=5):
'''
Functions that randomize order letters of a
Functions that randomize order letters of a
word which size is greater that size_min.
'''
if len(mot) > size_min:
size = round(len(mot) / pi)
first_letters = mot[:size]
last_letters = mot[-size:]
others_letters = list(mot[size:-size])
random.shuffle(others_letters)
mot_list = list()
mot_list.append(first_letters)
for letter in others_letters:
mot_list.append(letter)
mot_list.append(last_letters)
return(''.join(mot_list))
return(''.join(mot_list))
else:
return(mot)
......@@ -86,4 +86,3 @@ def paragraph_tutoreil(tutoriel=tutoriel):
paragraph = ' '.join([ random_letter(mot) for mot in tutoriel.split(" ")]) \
+ ": comptexter avec Gargantext."
return(paragraph)
......@@ -10,6 +10,7 @@ https://docs.djangoproject.com/en/1.6/ref/settings/
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
import os
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
PROJECT_PATH = os.path.join(BASE_DIR, os.pardir)
PROJECT_PATH = os.path.abspath(PROJECT_PATH)
......@@ -60,12 +61,6 @@ TEMPLATE_DIRS = (
# Always use forward slashes
# Don't forget to use absolute paths, not relative paths.
'/srv/gargantext/templates',
#import os.path
#
#TEMPLATE_DIRS = (
# os.path.join(os.path.dirname(__file__), 'templates').replace('\\','/'),
#)
)
......@@ -99,6 +94,7 @@ INSTALLED_APPS = (
'cte_tree',
'node',
'ngram',
'annotations',
'scrappers.scrap_pubmed',
'djcelery',
'aldjemy',
......@@ -194,3 +190,9 @@ TEMPLATE_CONTEXT_PROCESSORS = (
# grappelli custom
GRAPPELLI_ADMIN_TITLE = "Gargantext"
if DEBUG is True or 'GARGANTEXT_DEBUG' in os.environ:
try:
from gargantext_web.local_settings import *
except ImportError:
pass
......@@ -4,7 +4,7 @@ from django.contrib import admin
from django.contrib.auth.views import login
from gargantext_web import views, views_optimized
from annotations import urls as annotations_urls
import gargantext_web.api
import scrappers.scrap_pubmed.views as pubmedscrapper
......@@ -14,7 +14,7 @@ import tests.ngramstable.views as samtest
admin.autodiscover()
urlpatterns = patterns('',
# Admin views
url(r'^admin/', include(admin.site.urls)),
url(r'^login/', include(admin.site.urls)),
......@@ -22,21 +22,21 @@ urlpatterns = patterns('',
url(r'^auth/$', views.login_user),
url(r'^auth/logout/$', views.logout_user),
# Dynamic CSS
url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css),
# User Home view
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^maintenance/', views.get_maintenance),
# Project Management
url(r'^projects/$', views.projects),
url(r'^project/(\d+)/$', views_optimized.project),
url(r'^delete/(\d+)$', views.delete_node), # => api.node('id' = id, children = 'True', copies = False)
# Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
......@@ -46,15 +46,13 @@ urlpatterns = patterns('',
url(r'^project/(\d+)/corpus/(\d+)/chart$', views.chart),
url(r'^project/(\d+)/corpus/(\d+)/explorer$', views.graph),
url(r'^project/(\d+)/corpus/(\d+)/matrix$', views.matrix),
# Data management
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv), # => api.node.children('type' : 'data', 'format' : 'csv')
url(r'^corpus/(\d+)/node_link.json$', views.node_link), # => api.analysis('type': 'node_link', 'format' : 'json')
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), # => api.analysis('type': 'adjacency', 'format' : 'json')
url(r'^api/tfidf/(\d+)/(\w+)$', views_optimized.tfidf),
# url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
url(r'^api/tfidf2/(\d+)/(\w+)$', views.tfidf2),
# Data management
#url(r'^api$', gargantext_web.api.Root), # = ?
......@@ -68,8 +66,10 @@ urlpatterns = patterns('',
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^annotations/', include(annotations_urls)),
# Provisory tests
url(r'^ngrams$', views.ngrams), # to be removed
url(r'^ngrams$', views.ngrams), # to be removed
url(r'^nodeinfo/(\d+)$', views.nodeinfo), # to be removed ?
url(r'^tests/mvc$', views.tests_mvc),
url(r'^tests/mvc-listdocuments$', views.tests_mvc_listdocuments),
......@@ -100,12 +100,10 @@ if settings.MAINTENANCE:
urlpatterns = patterns('',
url(r'^img/logo.svg$', views.logo),
url(r'^css/bootstrap.css$', views.css),
url(r'^$', views.home_view),
url(r'^about/', views.get_about),
url(r'^admin/', include(admin.site.urls)),
url(r'^.*', views.get_maintenance),
)
This diff is collapsed.
......@@ -4,7 +4,7 @@ Install the requirements
1) Install all the Debian packages listed in dependances.deb
(also: sudo apt-get install postgresql-contrib)
2) Create a Pythton virtual enironnement
2) Create a Python virtual enironnement
On Debian:
---------
......
......@@ -26,7 +26,6 @@ from gargantext_web.db import *
# print('Empty table "%s"...' % (table._meta.db_table, ))
# table.objects.all().delete()
# Integration: hyperdata types
print('Initialize hyperdata...')
......@@ -43,8 +42,8 @@ hyperdata = {
'doi': 'string',
'journal': 'string',
}
for name, type in hyperdata.items():
models.Hyperdata(name=name, type=type).save()
for name, type_name in hyperdata.items():
models.Hyperdata(name=name, type=type_name).save()
# Integration: languages
......@@ -66,7 +65,7 @@ for language in pycountry.languages:
print('Initialize users...')
me = models.User.objects.get_or_create(username='alexandre')
gargantua = models.User.objects.get_or_create(username='gargantua')
gargantua, created = models.User.objects.get_or_create(username='gargantua')
node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
......@@ -82,9 +81,9 @@ print('Initialize node types...')
node_types = [
'Root', 'Trash',
'Project', 'Corpus', 'Document',
'Stem', 'Lem', 'Tfidf',
'Synonym',
'Project', 'Corpus', 'Document',
'Stem', 'Lem', 'Tfidf',
'Synonym',
'MiamList', 'StopList',
'Cooccurrence', 'WhiteList', 'BlackList'
]
......
......@@ -2,7 +2,6 @@
# TODO do apt-get install --force-yes --force-yes
apt-get install --force-yes postgresql
apt-get install --force-yes postgresql-contrib
apt-get install --force-yes rabbitmq-server
......@@ -40,7 +39,7 @@ apt-get install --force-yes liblapack-dev
#nlpserver
apt-get install --force-yes libgflags-dev
aptitude install --force-yes libgoogle-glog-dev
apt-get install --force-yes libgoogle-glog-dev
# MElt
# soon
......
......@@ -7,7 +7,7 @@ Pygments==1.6
RandomWords==0.1.12
SQLAlchemy==0.9.9
South==1.0
aldjemy==0.3.10
-e git+https://github.com/mathieurodic/aldjemy.git@master#egg=aldjemy
amqp==1.4.6
anyjson==0.3.3
bibtexparser==0.6.0
......@@ -36,11 +36,12 @@ djangorestframework==3.0.0
gensim==0.10.3
graphviz==0.4
ipython==2.2.0
jedi==0.9.0
kombu==3.0.24
lxml==3.4.1
#matplotlib==1.4.0
matplotlib==1.4.0
networkx==1.9
#nltk==3.0a4
nltk==3.0a4
nose==1.3.4
numpy==1.8.2
pandas==0.14.1
......
......@@ -58,7 +58,6 @@ class MeltTagger(Tagger):
self._pos_tagger.load_lexicon('%s/%s/lexicon.json' % (path, language))
self._pos_tagger.load_model('%s/%s' % (path, language))
self._preprocessing_commands = (
# ('/usr/local/bin/clean_noisy_characters.sh', ),
('%s/MElt_normalizer.pl' % path, '-nc', '-c', '-d', '%s/%s' % (path, language), '-l', language, ),
('%s/segmenteur.pl' % path, '-a', '-ca', '-af=%s/pctabr' % path, '-p', 'r'),
)
......@@ -93,15 +92,16 @@ class MeltTagger(Tagger):
tagged_tokens = self._pos_tagger.tag_token_sequence(tokens)
for token in tagged_tokens:
if len(token.string):
yield (token.string, _tag_replacements[token.label], )
yield (token.string, token.label, )
def tag_text(self, text, lemmatize=True):
tagged_tokens = self._tag(text)
# without lemmatization
if not lemmatize:
for tagged_token in tagged_tokens:
yield tagged_token
for form, tag in tagged_tokens:
yield (form, _tag_replacements[tag])
return
# lemmatization
# with lemmatization
command_input = ' '.join(
'%s/%s' % (token, tag)
for token, tag in tagged_tokens
......@@ -110,4 +110,4 @@ class MeltTagger(Tagger):
for token in lemmatized.split():
if len(token):
values = token.split('/')
yield (values[0], values[1], values[2].replace('*', ''))
yield (values[0], _tag_replacements[values[1]], values[2].replace('*', ''))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment