WIP

5b150ca3 · sim · 226dc26e · 5b150ca3 · 5b150ca3 · 5b150ca3
Commit 5b150ca3 authored Jul 20, 2017 by sim
9 changed files
--- a/alembic/versions/0b9b69af66c3_put_resourcetypes_in_database.py
+++ b/alembic/versions/0b9b69af66c3_put_resourcetypes_in_database.py
+"""Put resourcetypes in database
+
+Revision ID: 0b9b69af66c3
+Revises: bedce47c9e34
+Create Date: 2017-07-18 16:54:07.581421
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import gargantext
+
+
+# revision identifiers, used by Alembic.
+revision = '0b9b69af66c3'
+down_revision = 'bedce47c9e34'
+branch_labels = None
+depends_on = None
+
+
+RESOURCETYPES = [
+    {
+        "name": "EUROPRESSE",
+        "label": "Europresse",
+        "parser": "EuropresseParser",
+        "crawler": None,
+    },
+    {
+        "name": "JSTOR",
+        "label": "Jstor [RIS]",
+        "parser": "RISParser",
+        "crawler": None,
+    },
+    {
+        "name": "PUBMED",
+        "label": "Pubmed [XML]",
+        "parser": "PubmedParser",
+        "crawler": "PubmedCrawler",
+    },
+    {
+        "name": "SCOPUS",
+        "label": "Scopus [RIS]",
+        "parser": "RISParser",
+        "crawler": None,
+    },
+    {
+        "name": "WOS",
+        "label": "Web of Science [ISI]",
+        "parser": "ISIParser",
+        "crawler": None,
+    },
+    {
+        "name": "ZOTERO",
+        "label": "Zotero [RIS]",
+        "parser": "RISParser",
+        "crawler": None,
+    },
+    {
+        "name": "CSV",
+        "label": "CSV",
+        "parser": "CSVParser",
+        "crawler": None,
+    },
+    {
+        "name": "ISTEX",
+        "label": "ISTex",
+        "parser": "ISTexParser",
+        "crawler": None,
+    },
+    {
+        "name": "SCOAP",
+        "label": "SCOAP [API/XML]",
+        "parser": "CernParser",
+        "crawler": "CernCrawler",
+    },
+    {
+        "name": "REPEC",
+        "label": "REPEC [MULTIVAC API]",
+        "parser": "MultivacParser",
+        "crawler": "MultivacCrawler",
+    },
+    {
+        "name": "HAL",
+        "label": "HAL [API]",
+        "parser": "HalParser",
+        "crawler": "HalCrawler",
+    },
+    {
+        "name": "ISIDORE",
+        "label": "ISIDORE [SPARQLE API /!\ BETA]",
+        "parser": "IsidoreParser",
+        "crawler": "IsidoreCrawler",
+    },
+]
+
+
+def upgrade():
+    op.bulk_insert(ResourceTypeNode, [
+        {"parent_id": gargantua, "hyperdata": x} for x in RESOURCETYPES
+    ])
+
+
+def downgrade():
+    pass
--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -30,7 +30,7 @@ from .settings import BASE_DIR

 # XXX Originally defined here, imported here for backward-compatibility,
 #     should be removed later.
-from .models.nodes_constants import NODETYPES, LISTTYPES, INDEXED_HYPERDATA, \
+from .models.nodes_constants import LISTTYPES, INDEXED_HYPERDATA, \
                                    RESOURCETYPES, get_resource, get_resource_by_name, \
                                    load_parser, load_crawler


--- a/gargantext/models/nodes.py
+++ b/gargantext/models/nodes.py
@@ -6,10 +6,10 @@ from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index,
                  Integer, Float, String, DateTime, JSONB, \
                  MutableList, MutableDict
 from .users import User
-from .nodes_constants import NODETYPES, LISTTYPES, INDEXED_HYPERDATA
+from .nodes_constants import LISTTYPES, INDEXED_HYPERDATA

-__all__ = ['NODETYPES', 'LISTTYPES', 'INDEXED_HYPERDATA',
-           'Node', 'NodeNode', 'CorpusNode']
+__all__ = ['LISTTYPES', 'INDEXED_HYPERDATA',
+           'Node', 'NodeType', 'NodeNode', 'CorpusNode']


 class NodeType(TypeDecorator):
@@ -19,11 +19,49 @@ class NodeType(TypeDecorator):
    """
    impl = Integer

-    def process_bind_param(self, typename, dialect):
-        return NODETYPES.index(typename)
-
-    def process_result_value(self, typeindex, dialect):
-        return NODETYPES[typeindex]
+    NODETYPES = dict([
+        # Documents hierarchy
+        ( 1, 'USER'),
+        ( 2, 'PROJECT'),
+        ( 3, 'CORPUS'),
+        ( 4, 'DOCUMENT'),
+        (19, 'RESOURCE'),
+        (20, 'RESOURCETYPE'),
+        # Lists
+        ( 5, 'STOPLIST'),
+        ( 6, 'GROUPLIST'),
+        ( 7, 'MAINLIST'),
+        ( 8, 'MAPLIST'),
+        ( 9, 'COOCCURRENCES'),
+        # Scores
+        (10, 'OCCURRENCES'),
+        (11, 'SPECCLUSION'),
+        (18, 'GENCLUSION'),
+        (12, 'CVALUE'),
+        (13, 'TFIDF-CORPUS'),
+        (14, 'TFIDF-GLOBAL'),
+        # more scores (sorry!)
+        (16, 'TIRANK-LOCAL'),
+        (17, 'TIRANK-GLOBAL'),
+        # Docs subset
+        (15, 'FAVORITES'),
+    ])
+    NODETYPES_BY_NAME = dict((v, k) for k, v in NODETYPES.items())
+
+    def process_bind_param(self, typename, dialect=None):
+        return self.NODETYPES_BY_NAME.get(typename)
+
+    def process_result_value(self, typeindex, dialect=None):
+        return self.NODETYPES.get(typeindex)
+
+    @classmethod
+    def all(cls):
+        return cls.NODETYPES.values()
+
+    @classmethod
+    def get(cls, x):
+        return cls.process_result_value(cls, x) if isinstance(x, int) else \
+               cls.process_bind_param(cls, x)


 class Node(Base):
@@ -244,6 +282,15 @@ class CorpusNode(Node):
        ))


+class ResourceTypeNode(Node):
+    __mapper_args__ = {
+        'polymorphic_identity': 'RESOURCETYPE'
+    }
+
+    def test(self):
+        pass
+
+
 class NodeNode(Base):
    __tablename__ = 'nodes_nodes'
    __table_args__ = (
@@ -272,7 +319,7 @@ class NodeNode(Base):
 _ALREADY_IMPLEMENTED_NODE_TYPES = \
    set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__())

-for nodetype in NODETYPES:
+for nodetype in NodeType.all():
    if nodetype and nodetype not in _ALREADY_IMPLEMENTED_NODE_TYPES:
        # Convert nodetype to a CamelCase class name, assuming it's possible...
        class_name = ''.join(nodetype.title().split("-")) + 'Node'

--- a/gargantext/models/nodes_constants.py
+++ b/gargantext/models/nodes_constants.py
@@ -30,38 +30,6 @@ from ..util.dates import datetime, convert_to_datetime

 # types & models (nodes, lists, hyperdata, resource) ---------------------------------------------

-NODETYPES = [
-    # TODO separate id not array index, read by models.node
-    None,                    # 0
-    # documents hierarchy
-    'USER',                  # 1
-    'PROJECT',               # 2
-    #RESOURCE should be here but last
-    'CORPUS',                # 3
-    'DOCUMENT',              # 4
-    # lists
-    'STOPLIST',              # 5
-    'GROUPLIST',             # 6
-    'MAINLIST',              # 7
-    'MAPLIST',               # 8
-    'COOCCURRENCES',         # 9
-    # scores
-    'OCCURRENCES',           # 10
-    'SPECCLUSION',           # 11
-    'CVALUE',                # 12
-    'TFIDF-CORPUS',          # 13
-    'TFIDF-GLOBAL',          # 14
-    # docs subset
-    'FAVORITES',             # 15
-    # more scores (sorry!)
-
-    'TIRANK-LOCAL',          # 16
-    'TIRANK-GLOBAL',         # 17
-
-    'GENCLUSION',            # 18
-    'RESOURCE',              # 19
-]
-
 LISTTYPES = {
    'DOCUMENT'     : WeightedList,
    'GROUPLIST'    : Translations,   # todo remove "LIST" from name

--- a/gargantext/util/toolchain/ngram_coocs.py
+++ b/gargantext/util/toolchain/ngram_coocs.py
@@ -7,8 +7,9 @@ from sqlalchemy                import exc
 from gargantext.util.lists     import WeightedMatrix
 from gargantext.util.db        import get_engine
 from gargantext.util.db_cache  import cache
-from gargantext.constants      import DEFAULT_COOC_THRESHOLD, NODETYPES
+from gargantext.constants      import DEFAULT_COOC_THRESHOLD
 from gargantext.constants      import INDEXED_HYPERDATA
+from gargantext.models         import NodeType

 def compute_coocs(  corpus,
                    overwrite_id    = None,
@@ -101,7 +102,7 @@ def compute_coocs(  corpus,
        --    ==
        -- GROUP BY ngA, ngB
        )
-        """.format( nodetype_id = NODETYPES.index('DOCUMENT')
+        """.format( nodetype_id = NodeType.get('DOCUMENT')
                  , corpus_id=corpus.id
                  )


--- a/gargantext/views/api/api.py
+++ b/gargantext/views/api/api.py
@@ -6,7 +6,7 @@ from rest_framework.views       import APIView
 from rest_framework.authentication import SessionAuthentication, BasicAuthentication
 from rest_framework.permissions import IsAuthenticated

-from gargantext.constants       import RESOURCETYPES, NODETYPES, get_resource
+from gargantext.constants       import RESOURCETYPES, get_resource
 from gargantext.models          import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
 from gargantext.util.db         import session, delete, func, bulk_insert
 from gargantext.util.db_cache   import cache, or_

--- a/gargantext/views/api/nodes.py
+++ b/gargantext/views/api/nodes.py

-from gargantext.models          import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
-from gargantext.constants       import NODETYPES, DEFAULT_N_DOCS_HAVING_NGRAM
+from gargantext.models          import Node, NodeType, Ngram, NodeNgram, NodeNodeNgram, NodeNode
+from gargantext.constants       import DEFAULT_N_DOCS_HAVING_NGRAM
 from gargantext.util.db         import session, delete, func, bulk_insert
 from gargantext.util.db_cache   import cache, or_
 from gargantext.util.validation import validate
@@ -13,19 +13,17 @@ from collections import defaultdict

 import csv

-_node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams', 'date']
-_node_default_fields = ['id', 'parent_id', 'name', 'typename']
-_node_available_types = NODETYPES
-
-_hyperdata_available_fields = ['title', 'source', 'abstract', 'statuses',
-                               'language_name', 'language_iso3','language_iso2','language_id',
-                               'publication_date',
-                               'publication_year','publication_month', 'publication_day',
-                               'publication_hour','publication_minute','publication_second']
-#_node_available_formats = ['json', 'csv', 'bibex']
-

 def _query_nodes(request, node_id=None):
+    _node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams', 'date']
+    _node_default_fields = ['id', 'parent_id', 'name', 'typename']
+
+    _hyperdata_available_fields = ['title', 'source', 'abstract', 'statuses',
+                                   'language_name', 'language_iso3','language_iso2','language_id',
+                                   'publication_date',
+                                   'publication_year','publication_month', 'publication_day',
+                                   'publication_hour','publication_minute','publication_second']
+    #_node_available_formats = ['json', 'csv', 'bibex']

    if request.user.id is None:
        raise TypeError("This API request must come from an authenticated user.")
@@ -52,7 +50,7 @@ def _query_nodes(request, node_id=None):
            }},
        # optional filtering parameters
        'types': {'type': list, 'required': False, 'items': {
-            'type': str, 'range': _node_available_types,
+            'type': str, 'range': NodeType.all(),
        }},
        'parent_id': {'type': int, 'required': False},
    }})

--- a/unittests/tests_070_routes.py
+++ b/unittests/tests_070_routes.py
@@ -9,7 +9,7 @@ from django.test import Client
 from gargantext.models import Node

 # to be able to compare in test_073_get_api_one_node()
-from gargantext.constants import NODETYPES
+from gargantext.models import NodeType

 from gargantext.util.db   import session

@@ -80,7 +80,7 @@ class RoutesChecker(TestCase):
        nodename = json_content['name']
        print("\ntesting nodename:", nodename)
        print("\ntesting nodetype:", nodetype)
-        self.assertIn(nodetype, NODETYPES)
+        self.assertNotNone(NodeType.get(nodetype))
        self.assertEqual(nodename, "hello i'm a project")

    # TODO http://localhost:8000/api/nodes?types[]=CORPUS

--- a/unittests/tests_090_toolchain.py
+++ b/unittests/tests_090_toolchain.py
@@ -8,7 +8,7 @@ from django.test import TestCase, Client, RequestFactory
 from gargantext.models import Node, User
 from gargantext.util.db import session

-from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
+from gargantext.constants import RESOURCETYPES, get_resource

 from gargantext.util.toolchain.main import *
 DATA_SAMPLE_DIR = "/srv/gargantext/unittests/mini_test_samples/"