Merge branch 'unstable' of ssh://delanoe.org:1979/gargantext into samuel

b7d18edb · PkSM3 · 00afa364 · daf4e9d3 · b7d18edb · b7d18edb
Commit b7d18edb authored Jan 04, 2016 by PkSM3
13 changed files
--- a/annotations/static/annotations/document.js
+++ b/annotations/static/annotations/document.js
@@ -2,10 +2,13 @@
  'use strict';

  var annotationsAppDocument = angular.module('annotationsAppDocument', ['annotationsAppHttp']);
-
  annotationsAppDocument.controller('DocController',
    ['$scope', '$rootScope', '$timeout', 'NgramListHttpService', 'DocumentHttpService',
    function ($scope, $rootScope, $timeout, NgramListHttpService, DocumentHttpService) {
+
+      // dataLoading = signal pour afficher wait
+      $scope.dataLoading = true ;
+
      $rootScope.documentResource = DocumentHttpService.get(
        {'docId': $rootScope.docId},
        function(data, responseHeaders) {
@@ -27,6 +30,7 @@
            function(data) {
              $rootScope.annotations = data[$rootScope.corpusId.toString()][$rootScope.docId.toString()];
              $rootScope.lists = data[$rootScope.corpusId.toString()].lists;
+              $scope.dataLoading = false ;
            },
            function(data) {
              console.error("unable to get the list of ngrams");
@@ -34,6 +38,7 @@
          );

      });
+
      // TODO setup article pagination
      $scope.onPreviousClick = function () {
        DocumentHttpService.get($scope.docId - 1);

--- a/annotations/templates/annotations/main.html
+++ b/annotations/templates/annotations/main.html
@@ -86,6 +86,16 @@
                <li class="list-group-item small"><span class="badge">date</span>{[{publication_date}]}</li>
              </ul>
            </div>
+
+            <div ng-if="dataLoading">
+                Loading text...
+                <br>
+                  <center>
+                    <img width="10%" src="{% static 'img/ajax-loader.gif'%}"></img>
+                  </center>
+                <br>
+            </div>
+
            <div ng-if="abstract_text != null">
                <span class="badge">abstract</span>
            </div>

--- a/annotations/urls.py
+++ b/annotations/urls.py
@@ -2,9 +2,15 @@ from django.conf.urls import patterns, url
 from annotations import views


+# /!\ urls patterns here are *without* the trailing slash
+
 urlpatterns = patterns('',
+    # json:title,id,authors,journal,
+    #      publication_date
+    #      abstract_text,full_text
    url(r'^document/(?P<doc_id>[0-9]+)$', views.Document.as_view()), # document view
    url(r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$', views.NgramList.as_view()), # the list associated with an ngram
    url(r'^lists/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$', views.NgramEdit.as_view()),
+    # POST (fixed 2015-12-16)
    url(r'^lists/(?P<list_id>[0-9]+)/ngrams/create$', views.NgramCreate.as_view()), #
 )
--- a/annotations/views.py
+++ b/annotations/views.py
@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException
 from rest_framework.authentication import SessionAuthentication, BasicAuthentication

 from node.models import Node
-from gargantext_web.db import session, cache, Node, NodeNgram
+from gargantext_web.db import session, cache, Node, NodeNgram, Ngram
 from ngram.lists import listIds, listNgramIds
 from gargantext_web.db import get_or_create_node

@@ -138,6 +138,8 @@ class NgramCreate(APIView):
    def post(self, request, list_id):
        """
        create NGram in a given list
+        
+        example: request.data = {'text': 'phylogeny'}
        """
        list_id = int(list_id)
        # format the ngram's text
@@ -161,6 +163,7 @@ class NgramCreate(APIView):
        ngram_id = ngram.id
        # create the new node_ngram relation
        # TODO check existing Node_Ngram ?
+        # £TODO ici indexation
        node_ngram = NodeNgram(node_id=list_id, ngram_id=ngram_id, weight=1.0)
        session.add(node_ngram)
        session.commit()

--- a/exec.py
+++ b/exec.py
@@ -11,8 +11,10 @@ from ngram.mapList import compute_mapList
 from gargantext_web.db import NodeNgram
 from admin.utils import WorkflowTracking
 from ngram.importExport import exportNgramList, importNgramList
+from analysis.periods import phylo_clusters
+
+from ngram.occurrences import compute_occs

-    

 def ngram_workflow(corpus, n=5000):
    '''
@@ -50,13 +52,17 @@ def ngram_workflow(corpus, n=5000):
 #    update_state.processing_(corpus, "TF-IDF local score")
 #    compute_tfidf(corpus)
    # update_state.processing_(corpus, "OCCS local score")
-    # compute_occs(corpus)
-    update_state.processing_(corpus, "0")
+    compute_occs(corpus)
+    #update_state.processing_(corpus, "0")

 if __name__ == "__main__":
    node_id = sys.argv[1] 
    corpus=session.query(Node).filter(Node.id==node_id).first()
-    exportNgramList(corpus, "list.csv")
-    #importNgramList(corpus, "list.csv")
+    
    #ngram_workflow(corpus)
+    
+    #exportNgramList(corpus, "list.csv")
+    #importNgramList(corpus, "list.csv")
+
+    phylo_clusters(corpus, range(2012,2016))

--- a/init/stop_lists/en.txt
+++ b/init/stop_lists/en.txt
-
 
 -

@@ -462,3 +461,16 @@ your
 yours
 yourself
 yourselves
+Francis group
+© taylor
+copyright taylor
+copyright © taylor
+copyright ©
+springer science+business media
+sage publications
+oxford university press
+© springer international publishing switzerland
+john wiley
+elsevier ltd
+© fpi
+elsevier inc
--- a/init_accounts.py
+++ b/init_accounts.py
@@ -14,16 +14,18 @@ def notify_user(username, email, password):
    Votre login est: %s
    Votre mot de passe est : %s

-    Nous restons votre disposition pour tout complément d'information.
+    En janvier prochain, il y aura une formation Gargantext (gratuite).
+    Inscription obligatoire pour les dernière places:
+    http://iscpif.fr/event/formation-gargantext/
+
+    Nous restons à votre disposition pour tout complément d'information.
    Cordialement
    --
        L'équipe de Gargantext (CNRS)

    ''' % (username, password)

-    send_mail('[Gargantext] Votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], fail_silently=False )
-    #send_mail('[Gargantext] Votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], ['alexandre@delanoe.org'] )
-
+    send_mail('[Gargantext] Votre accès à la plateforme', message, 'alexandre.delanoe@iscpif.fr', [email], fail_silently=False )

    # add option for mass sending email


--- a/ngram/importExport.py
+++ b/ngram/importExport.py
--- a/parsing/FileParsers/EuropressFileParser.py
+++ b/parsing/FileParsers/EuropressFileParser.py
--- a/parsing/FileParsers/IsiFileParser.py
+++ b/parsing/FileParsers/IsiFileParser.py
@@ -15,6 +15,7 @@ class IsiFileParser(RisFileParser):
            b"TI":  {"type": "hyperdata", "key": "title", "separator": " "},
            b"AU":  {"type": "hyperdata", "key": "authors", "separator": ", "},
            b"DI":  {"type": "hyperdata", "key": "doi"},
+            b"SO":  {"type": "hyperdata", "key": "journal"},
            b"PY":  {"type": "hyperdata", "key": "publication_year"},
            b"PD":  {"type": "hyperdata", "key": "publication_month"},
            b"LA":  {"type": "hyperdata", "key": "language_fullname"},

--- a/parsing/FileParsers/RisFileParser.py
+++ b/parsing/FileParsers/RisFileParser.py
@@ -19,6 +19,7 @@ class RisFileParser(FileParser):
            b"TI":  {"type": "hyperdata", "key": "title", "separator": " "},
            b"ST":  {"type": "hyperdata", "key": "subtitle", "separator": " "},
            b"AU":  {"type": "hyperdata", "key": "authors", "separator": ", "},
+            b"T2":  {"type": "hyperdata", "key": "journal"},
            b"UR":  {"type": "hyperdata", "key": "doi"},
            b"PY":  {"type": "hyperdata", "key": "publication_year"},
            b"PD":  {"type": "hyperdata", "key": "publication_month"},

--- a/parsing/FileParsers/ZoteroFileParser.py
+++ b/parsing/FileParsers/ZoteroFileParser.py
@@ -12,6 +12,7 @@ class ZoteroFileParser(RisFileParser):
            b"ER":  {"type": "delimiter"},
            b"TI":  {"type": "hyperdata", "key": "title", "separator": " "},
            b"AU":  {"type": "hyperdata", "key": "authors", "separator": ", "},
+            b"T2":  {"type": "hyperdata", "key": "journal"},
            b"UR":  {"type": "hyperdata", "key": "doi"},
            b"DA":  {"type": "hyperdata", "key": "publication_date_to_parse"},
            b"PY":  {"type": "hyperdata", "key": "publication_year"},

--- a/parsing/NgramsExtractors/NgramsExtractor.py
+++ b/parsing/NgramsExtractors/NgramsExtractor.py
 # from ..Taggers import NltkTagger
 from ..Taggers import TurboTagger
 import nltk
+from re import sub


 """Base class for all ngrams extractors.
@@ -33,9 +34,21 @@ class NgramsExtractor:
    Returns a list of the ngrams found in the given text.
    """
    def extract_ngrams(self, contents):
-        tagged_tokens = list(self.tagger.tag_text(contents))
+        clean_contents = self._prepare_text(contents)
+
+        # ici tagging
+        tagged_tokens = list(self.tagger.tag_text(clean_contents))
+
        if len(tagged_tokens):
            grammar_parsed = self._grammar.parse(tagged_tokens)
            for subtree in grammar_parsed.subtrees():
                if subtree.label() == self._label:
                    yield subtree.leaves()
+
+    @staticmethod
+    def _prepare_text(text_contents):
+        """
+        Clean the text for better POS tagging
+        """
+        # strip xml tags
+        return sub(r"<[^>]{0,45}>","",text_contents)