Commit 5b150ca3 authored by sim's avatar sim

WIP

parent 226dc26e
"""Put resourcetypes in database
Revision ID: 0b9b69af66c3
Revises: bedce47c9e34
Create Date: 2017-07-18 16:54:07.581421
"""
from alembic import op
import sqlalchemy as sa
import gargantext
# revision identifiers, used by Alembic.
revision = '0b9b69af66c3'
down_revision = 'bedce47c9e34'
branch_labels = None
depends_on = None
RESOURCETYPES = [
{
"name": "EUROPRESSE",
"label": "Europresse",
"parser": "EuropresseParser",
"crawler": None,
},
{
"name": "JSTOR",
"label": "Jstor [RIS]",
"parser": "RISParser",
"crawler": None,
},
{
"name": "PUBMED",
"label": "Pubmed [XML]",
"parser": "PubmedParser",
"crawler": "PubmedCrawler",
},
{
"name": "SCOPUS",
"label": "Scopus [RIS]",
"parser": "RISParser",
"crawler": None,
},
{
"name": "WOS",
"label": "Web of Science [ISI]",
"parser": "ISIParser",
"crawler": None,
},
{
"name": "ZOTERO",
"label": "Zotero [RIS]",
"parser": "RISParser",
"crawler": None,
},
{
"name": "CSV",
"label": "CSV",
"parser": "CSVParser",
"crawler": None,
},
{
"name": "ISTEX",
"label": "ISTex",
"parser": "ISTexParser",
"crawler": None,
},
{
"name": "SCOAP",
"label": "SCOAP [API/XML]",
"parser": "CernParser",
"crawler": "CernCrawler",
},
{
"name": "REPEC",
"label": "REPEC [MULTIVAC API]",
"parser": "MultivacParser",
"crawler": "MultivacCrawler",
},
{
"name": "HAL",
"label": "HAL [API]",
"parser": "HalParser",
"crawler": "HalCrawler",
},
{
"name": "ISIDORE",
"label": "ISIDORE [SPARQLE API /!\ BETA]",
"parser": "IsidoreParser",
"crawler": "IsidoreCrawler",
},
]
def upgrade():
op.bulk_insert(ResourceTypeNode, [
{"parent_id": gargantua, "hyperdata": x} for x in RESOURCETYPES
])
def downgrade():
pass
......@@ -30,7 +30,7 @@ from .settings import BASE_DIR
# XXX Originally defined here, imported here for backward-compatibility,
# should be removed later.
from .models.nodes_constants import NODETYPES, LISTTYPES, INDEXED_HYPERDATA, \
from .models.nodes_constants import LISTTYPES, INDEXED_HYPERDATA, \
RESOURCETYPES, get_resource, get_resource_by_name, \
load_parser, load_crawler
......
......@@ -6,10 +6,10 @@ from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index,
Integer, Float, String, DateTime, JSONB, \
MutableList, MutableDict
from .users import User
from .nodes_constants import NODETYPES, LISTTYPES, INDEXED_HYPERDATA
from .nodes_constants import LISTTYPES, INDEXED_HYPERDATA
__all__ = ['NODETYPES', 'LISTTYPES', 'INDEXED_HYPERDATA',
'Node', 'NodeNode', 'CorpusNode']
__all__ = ['LISTTYPES', 'INDEXED_HYPERDATA',
'Node', 'NodeType', 'NodeNode', 'CorpusNode']
class NodeType(TypeDecorator):
......@@ -19,11 +19,49 @@ class NodeType(TypeDecorator):
"""
impl = Integer
def process_bind_param(self, typename, dialect):
return NODETYPES.index(typename)
def process_result_value(self, typeindex, dialect):
return NODETYPES[typeindex]
NODETYPES = dict([
# Documents hierarchy
( 1, 'USER'),
( 2, 'PROJECT'),
( 3, 'CORPUS'),
( 4, 'DOCUMENT'),
(19, 'RESOURCE'),
(20, 'RESOURCETYPE'),
# Lists
( 5, 'STOPLIST'),
( 6, 'GROUPLIST'),
( 7, 'MAINLIST'),
( 8, 'MAPLIST'),
( 9, 'COOCCURRENCES'),
# Scores
(10, 'OCCURRENCES'),
(11, 'SPECCLUSION'),
(18, 'GENCLUSION'),
(12, 'CVALUE'),
(13, 'TFIDF-CORPUS'),
(14, 'TFIDF-GLOBAL'),
# more scores (sorry!)
(16, 'TIRANK-LOCAL'),
(17, 'TIRANK-GLOBAL'),
# Docs subset
(15, 'FAVORITES'),
])
NODETYPES_BY_NAME = dict((v, k) for k, v in NODETYPES.items())
def process_bind_param(self, typename, dialect=None):
return self.NODETYPES_BY_NAME.get(typename)
def process_result_value(self, typeindex, dialect=None):
return self.NODETYPES.get(typeindex)
@classmethod
def all(cls):
return cls.NODETYPES.values()
@classmethod
def get(cls, x):
return cls.process_result_value(cls, x) if isinstance(x, int) else \
cls.process_bind_param(cls, x)
class Node(Base):
......@@ -244,6 +282,15 @@ class CorpusNode(Node):
))
class ResourceTypeNode(Node):
__mapper_args__ = {
'polymorphic_identity': 'RESOURCETYPE'
}
def test(self):
pass
class NodeNode(Base):
__tablename__ = 'nodes_nodes'
__table_args__ = (
......@@ -272,7 +319,7 @@ class NodeNode(Base):
_ALREADY_IMPLEMENTED_NODE_TYPES = \
set(cls.__mapper_args__.get('polymorphic_identity') for cls in Node.__subclasses__())
for nodetype in NODETYPES:
for nodetype in NodeType.all():
if nodetype and nodetype not in _ALREADY_IMPLEMENTED_NODE_TYPES:
# Convert nodetype to a CamelCase class name, assuming it's possible...
class_name = ''.join(nodetype.title().split("-")) + 'Node'
......
......@@ -30,38 +30,6 @@ from ..util.dates import datetime, convert_to_datetime
# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
NODETYPES = [
# TODO separate id not array index, read by models.node
None, # 0
# documents hierarchy
'USER', # 1
'PROJECT', # 2
#RESOURCE should be here but last
'CORPUS', # 3
'DOCUMENT', # 4
# lists
'STOPLIST', # 5
'GROUPLIST', # 6
'MAINLIST', # 7
'MAPLIST', # 8
'COOCCURRENCES', # 9
# scores
'OCCURRENCES', # 10
'SPECCLUSION', # 11
'CVALUE', # 12
'TFIDF-CORPUS', # 13
'TFIDF-GLOBAL', # 14
# docs subset
'FAVORITES', # 15
# more scores (sorry!)
'TIRANK-LOCAL', # 16
'TIRANK-GLOBAL', # 17
'GENCLUSION', # 18
'RESOURCE', # 19
]
LISTTYPES = {
'DOCUMENT' : WeightedList,
'GROUPLIST' : Translations, # todo remove "LIST" from name
......
......@@ -7,8 +7,9 @@ from sqlalchemy import exc
from gargantext.util.lists import WeightedMatrix
from gargantext.util.db import get_engine
from gargantext.util.db_cache import cache
from gargantext.constants import DEFAULT_COOC_THRESHOLD, NODETYPES
from gargantext.constants import DEFAULT_COOC_THRESHOLD
from gargantext.constants import INDEXED_HYPERDATA
from gargantext.models import NodeType
def compute_coocs( corpus,
overwrite_id = None,
......@@ -101,7 +102,7 @@ def compute_coocs( corpus,
-- ==
-- GROUP BY ngA, ngB
)
""".format( nodetype_id = NODETYPES.index('DOCUMENT')
""".format( nodetype_id = NodeType.get('DOCUMENT')
, corpus_id=corpus.id
)
......
......@@ -6,7 +6,7 @@ from rest_framework.views import APIView
from rest_framework.authentication import SessionAuthentication, BasicAuthentication
from rest_framework.permissions import IsAuthenticated
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.constants import RESOURCETYPES, get_resource
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_
......
from gargantext.models import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import NODETYPES, DEFAULT_N_DOCS_HAVING_NGRAM
from gargantext.models import Node, NodeType, Ngram, NodeNgram, NodeNodeNgram, NodeNode
from gargantext.constants import DEFAULT_N_DOCS_HAVING_NGRAM
from gargantext.util.db import session, delete, func, bulk_insert
from gargantext.util.db_cache import cache, or_
from gargantext.util.validation import validate
......@@ -13,19 +13,17 @@ from collections import defaultdict
import csv
_node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams', 'date']
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_node_available_types = NODETYPES
_hyperdata_available_fields = ['title', 'source', 'abstract', 'statuses',
'language_name', 'language_iso3','language_iso2','language_id',
'publication_date',
'publication_year','publication_month', 'publication_day',
'publication_hour','publication_minute','publication_second']
#_node_available_formats = ['json', 'csv', 'bibex']
def _query_nodes(request, node_id=None):
_node_available_fields = ['id', 'parent_id', 'name', 'typename', 'hyperdata', 'ngrams', 'date']
_node_default_fields = ['id', 'parent_id', 'name', 'typename']
_hyperdata_available_fields = ['title', 'source', 'abstract', 'statuses',
'language_name', 'language_iso3','language_iso2','language_id',
'publication_date',
'publication_year','publication_month', 'publication_day',
'publication_hour','publication_minute','publication_second']
#_node_available_formats = ['json', 'csv', 'bibex']
if request.user.id is None:
raise TypeError("This API request must come from an authenticated user.")
......@@ -52,7 +50,7 @@ def _query_nodes(request, node_id=None):
}},
# optional filtering parameters
'types': {'type': list, 'required': False, 'items': {
'type': str, 'range': _node_available_types,
'type': str, 'range': NodeType.all(),
}},
'parent_id': {'type': int, 'required': False},
}})
......
......@@ -9,7 +9,7 @@ from django.test import Client
from gargantext.models import Node
# to be able to compare in test_073_get_api_one_node()
from gargantext.constants import NODETYPES
from gargantext.models import NodeType
from gargantext.util.db import session
......@@ -80,7 +80,7 @@ class RoutesChecker(TestCase):
nodename = json_content['name']
print("\ntesting nodename:", nodename)
print("\ntesting nodetype:", nodetype)
self.assertIn(nodetype, NODETYPES)
self.assertNotNone(NodeType.get(nodetype))
self.assertEqual(nodename, "hello i'm a project")
# TODO http://localhost:8000/api/nodes?types[]=CORPUS
......
......@@ -8,7 +8,7 @@ from django.test import TestCase, Client, RequestFactory
from gargantext.models import Node, User
from gargantext.util.db import session
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.constants import RESOURCETYPES, get_resource
from gargantext.util.toolchain.main import *
DATA_SAMPLE_DIR = "/srv/gargantext/unittests/mini_test_samples/"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment