Commit dd730426 authored by delanoe's avatar delanoe

FIX conflicts merge.

parents 0f0c16cf 73f87e93
......@@ -6,14 +6,14 @@ class ISIParser(RISParser):
_begin = 3
_parameters = {
b"ER": {"type": "delimiter"},
b"TI": {"type": "hyperdata", "key": "title", "separator": " "},
b"AU": {"type": "hyperdata", "key": "authors", "separator": ", "},
b"DI": {"type": "hyperdata", "key": "doi"},
b"SO": {"type": "hyperdata", "key": "journal"},
b"PY": {"type": "hyperdata", "key": "publication_year"},
b"PD": {"type": "hyperdata", "key": "publication_month"},
b"LA": {"type": "hyperdata", "key": "language_fullname"},
b"AB": {"type": "hyperdata", "key": "abstract", "separator": " "},
b"WC": {"type": "hyperdata", "key": "fields"},
"ER": {"type": "delimiter"},
"TI": {"type": "hyperdata", "key": "title", "separator": " "},
"AU": {"type": "hyperdata", "key": "authors", "separator": ", "},
"DI": {"type": "hyperdata", "key": "doi"},
"SO": {"type": "hyperdata", "key": "journal"},
"PY": {"type": "hyperdata", "key": "publication_year"},
"PD": {"type": "hyperdata", "key": "publication_month"},
"LA": {"type": "hyperdata", "key": "language_fullname"},
"AB": {"type": "hyperdata", "key": "abstract", "separator": " "},
"WC": {"type": "hyperdata", "key": "fields"},
}
from ._Parser import Parser
from gargantext.util.languages import languages
#from admin.utils import PrintException
class RISParser(Parser):
# def __init__(self, language_cache=None):
#
# #super(Parser, self).__init__()
# #super(Parser, self).__init__()
# self._languages_cache = LanguagesCache() if language_cache is None else language_cache
_begin = 6
_parameters = {
"ER": {"type": "delimiter"},
"ER": {"type": "delimiter"}, # the record delimiter
"TI": {"type": "hyperdata", "key": "title", "separator": " "},
"ST": {"type": "hyperdata", "key": "subtitle", "separator": " "},
"AU": {"type": "hyperdata", "key": "authors", "separator": "\n"},
......@@ -30,36 +20,75 @@ class RISParser(Parser):
}
def parse(self, file):
print("=====> PARSING", file)
hyperdata = {}
last_key = None
last_values = []
current_value = None
for line in file:
# bytes ~~> str
line = line.decode("UTF-8").rstrip('\r\n')
if len(line) > 2 :
# extract the parameter key
if len(line) >= 2 :
# extract the parameter key...
parameter_key = line[:2]
if parameter_key != ' ' and parameter_key != last_key:
# ...and keep the rest for when we know what to do with it
current_value = line[self._begin:]
# it's a new key => therefore the previous key is finished
if parameter_key != last_key:
if last_key in self._parameters:
# translate the parameter key
# translate key
parameter = self._parameters[last_key]
# 1 - we record the previous value array...
if parameter["type"] == "hyperdata":
separator = parameter["separator"] if "separator" in parameter else ""
hyperdata[parameter["key"]] = separator.join(last_values)
#... or even finish the record (rare here, most often after empty line)
elif parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys():
if 'language_iso3' not in hyperdata.keys():
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
last_key = None
hyperdata = {}
last_key = parameter_key
# 2 - new key: also we start a new value array and move on to the next key
last_values = []
try:
last_values.append(line[self._begin:])
except Exception as error:
print(error)
last_key = parameter_key
# 3 - new key or old: in any case we pass contents to
# the value array buffer (=> for the next loop only)
last_values.append(current_value)
current_value = None
# empty line => we need to check if PREVIOUS LINE was record delimiter
else:
if last_key in self._parameters:
if parameter["type"] == "delimiter":
if 'language_fullname' not in hyperdata.keys():
if 'language_iso3' not in hyperdata.keys():
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
last_key = None
hyperdata = {}
# [end of loop per lines]
# if we have any values left on previous line => put them in hd
if last_key in self._parameters:
parameter = self._parameters[last_key]
if parameter["type"] == "hyperdata":
separator = parameter["separator"] if "separator" in parameter else ""
hyperdata[parameter["key"]] = separator.join(last_values)
# if a hyperdata object is left in memory, yield it as well
if hyperdata:
if 'language_fullname' not in hyperdata.keys():
if 'language_iso3' not in hyperdata.keys():
if 'language_iso2' not in hyperdata.keys():
hyperdata['language_iso2'] = 'en'
yield hyperdata
......@@ -52,26 +52,13 @@ TEST_RUNNER = 'unittests.framework.GargTestRunner'
Using a DB session
------------------
To emulate a session the way we usually do it in gargantext, our `unittests.framework` also
provides a session object to the test database via `GargTestRunner.testdb_session`
To work correctly, it needs to be read *inside the test setup.*
The GargTestRunner overrides default settings so that the test database is used in the way we usually do it in gargantext :
**Example**
```
from unittests.framework import GargTestRunner
from gargantext.util.db import session
class MyTestRecipes(TestCase):
def setUp(self):
# -------------------------------------
session = GargTestRunner.testdb_session
# -------------------------------------
new_project = Node(
typename = 'PROJECT',
name = "hello i'm a project",
)
session.add(new_project)
session.commit()
session.query(Nodes).all() # gives all the nodes of the testdb
```
......@@ -123,12 +110,3 @@ class MyTestRecipes(TestCase):
```
*Si vous aimez les aventures de Peter Corser, lisez l'album précédent ["Doors"](https://gogs.iscpif.fr/leclaire/doors)* (Scénario M. Leclaire, Dessins R. Loth) (disponible dans toutes les bonnes librairies)
FIXME
-----
url client get will still give read access to original DB ?
cf. http://stackoverflow.com/questions/19714521
cf. http://stackoverflow.com/questions/11046039
cf. test_073_get_api_one_node
......@@ -4,11 +4,7 @@ A test runner derived from default (DiscoverRunner) but adapted to our custom DB
cf. docs.djangoproject.com/en/1.9/topics/testing/advanced/#using-different-testing-frameworks
cf. gargantext/settings.py => TEST_RUNNER
cf. dbmigrate.py
FIXME url get will still give read access to original DB ?
cf. http://stackoverflow.com/questions/19714521
cf. http://stackoverflow.com/questions/11046039
cf. test_073_get_api_one_node
cf ./session_and_db_remarks.md
"""
# basic elements
......@@ -24,13 +20,18 @@ from django.contrib.auth.models import User
from os import environ
from django import setup
environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
# hack to make our minimal django use the same test DB settings as DiscoverRunner
# (more details: cf ./session_and_db_remarks.md)
DATABASES['default']['NAME'] = DATABASES['default']['TEST']['NAME']
setup() # models can now be imported
from gargantext import models # Base is now filled
from gargantext.util.db import Base # contains metadata.tables
# ------------------------------------------------------------------------------
# things needed to provide a session
from sqlalchemy.orm import sessionmaker, scoped_session
# thanks to our hack, util.db.engine and util.db.session already use the test DB
from gargantext.util.db import engine, session
class GargTestRunner(DiscoverRunner):
......@@ -38,23 +39,21 @@ class GargTestRunner(DiscoverRunner):
We use the default test runner but we just add
our own dbmigrate elements at db creation
=> we let django.test.runner do the test db creation + auto migrations
=> we retrieve the test db name from django.test.runner
=> we create a test engine like in gargantext.db.create_engine but with the test db name
=> we create tables for our models like in dbmigrate with the test engine
1) we let django.test.runner do the test db creation + auto migrations
2) we create tables for our models like in dbmigrate with the test engine
TODO: list of tables to be created are hard coded in self.models
POSSIBLE: definitions of tables to be created should be fully hard coded like the list in self.models
=> then remove django.setup() used to import models and DATABASES renaming to prevent its secondary effects
"""
# we'll also expose a session as GargTestRunner.testdb_session
testdb_session = None
def __init__(self, *args, **kwargs):
# our custom tables to be created (in correct order)
# our custom tablenames to be created (in correct order)
self.models = ['ngrams', 'nodes', 'contacts', 'nodes_nodes', 'nodes_ngrams', 'nodes_nodes_ngrams', 'nodes_ngrams_ngrams', 'nodes_hyperdata']
self.testdb_engine = None
# and execute default django init
# POSSIBLE: hard-code here our custom table declarations
# self.tables = [Table('ngrams', MetaData(bind=None)....)]
# and execute default django unittests init
old_config = super(GargTestRunner, self).__init__(*args, **kwargs)
......@@ -67,28 +66,6 @@ class GargTestRunner(DiscoverRunner):
# default django setup performs base creation + auto migrations
old_config = super(GargTestRunner, self).setup_databases(*args, **kwargs)
# retrieve the testdb_name set by DiscoverRunner
testdb_names = []
for db_infos in get_unique_databases_and_mirrors():
# a key has the form: (IP, port, backend, dbname)
for key in db_infos:
# db_infos[key] has the form (dbname, {'default'})
testdb_names.append(db_infos[key][0])
# /!\ hypothèse d'une database unique /!\
testdb_name = testdb_names[0]
# now we use a copy of our normal db config...
db_params = DATABASES['default']
# ...just changing the name
db_params['NAME'] = testdb_name
# connect to this test db
testdb_url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format_map(db_params)
self.testdb_engine = create_engine( testdb_url )
print("TESTDB INIT: opened connection to database **%s**" % db_params['NAME'])
# we retrieve real tables declarations from our loaded Base
sqla_models = (Base.metadata.tables[model_name] for model_name in self.models)
......@@ -103,16 +80,11 @@ class GargTestRunner(DiscoverRunner):
# and now creation of each table in our test db (like dbmigrate)
for model in sqla_models:
try:
model.create(self.testdb_engine)
model.create(engine)
print('TESTDB INIT: created model: `%s`' % model)
except Exception as e:
print('TESTDB INIT ERROR: could not create model: `%s`, %s' % (model, e))
# we also create a session to provide it the way we usually do in garg
# (it's a class based static var to be able to share it with our tests)
GargTestRunner.testdb_session = scoped_session(sessionmaker(bind=self.testdb_engine))
# and let's create a user too otherwise we'll never be able to login
user = User.objects.create_user(username='pcorser', password='peter')
......@@ -126,10 +98,10 @@ class GargTestRunner(DiscoverRunner):
After all tests
"""
# close the session
GargTestRunner.testdb_session.close()
session.close()
# free the connection
self.testdb_engine.dispose()
engine.dispose()
# default django teardown performs destruction of the test base
super(GargTestRunner, self).teardown_databases(old_config, *args, **kwargs)
......
# About the DB settings during the tests
rloth 2016-08-22
#### Correct ordering strategies
Our specific database model causes a problem for the correct order of doing things
- the good practice in creating the test framework is:
1. create a child class of DiscoverRunner
2. define a 'TEST' key in `settings.DATABASES.default`
3. let the DiscoverRunner create the tables in his `__init__()` by calling `super.__init__()` from the child class
(cf. https://docs.djangoproject.com/en/1.10/topics/testing/advanced/)
- but we have tables not in the migrations... so creating our full database model (with the `nodes_*` tables) implies either to hard-code their definitions or to:
1. do a `django.setup()` first so we can load the SQLAlchemy models (`import gargantext.models`)
2. from there use `util.db.Base` as the table definitions
- Table('nodes', Column('id', Integer()...)
- Table('ngrams' ...)
- etc.
3. Use those definitions to create the tables: `table_definition.create(engine)`
*(cf. db_migrate.py)*
#### But we see these two ordering strategies are contradictory!
**Explanation**: Doing the `django.setup()` to get the models will load the app modules before using the test database created by `DiscoverRunner.__init__()`
**Consequence**: `util.db.session` will use the native settings for the "real DB" instead of the "test DB".
#### So we need to "cheat" a little bit...
**Solution 1** *(=> will be better in the long run when the tables stop changing)*
We could hard-code the list of tables and columns to create in the test DB. Then there would be no need to load the models to do the migration, so therefore no need to do a `django.setup()` before the `DiscoverRunner.__init__()`
**Solution 2** *(=> used now)*
We do the `django.setup()` but we modify its `gargantext.settings.DATABASES` on-the-fly with this line:
```
DATABASES['default']['NAME'] = DATABASES['default']['TEST']['NAME']
```
This is a dirty hack because changing settings at runtime makes final values difficult to track, but this way, the setup part and the DiscoverRunner part will share the same DB name (`test_gargandb`)
### To inspect the testdb
Run tests with:
```
./manage.py test unittests/ --keepdb
```
And after the tests, connect to it as gargantua with `psql test_gargandb`
......@@ -11,14 +11,31 @@ from gargantext.models import Node
# to be able to compare in test_073_get_api_one_node()
from gargantext.constants import NODETYPES
# provides GargTestRunner.testdb_session
from unittests.framework import GargTestRunner
from gargantext.util.db import session
class RoutesChecker(TestCase):
@classmethod
def setUpClass(cls):
"""
Will be run *once* for all tests here
NEEDS TO HAVE TestCase.setUpClass()
"""
TestCase.setUpClass()
new_project = Node(
typename = 'PROJECT',
name = "hello i'm a project",
user_id = 1 # todo make sure it's the same user as login
)
session.add(new_project)
session.commit()
cls.a_node_id = new_project.id
print("created a project with id: %i" % new_project.id)
def setUp(self):
"""
Will be run before each test
Will be run before *each* test here
"""
self.client = Client()
......@@ -27,18 +44,7 @@ class RoutesChecker(TestCase):
'/auth/login/',
{'username': 'pcorser', 'password': 'peter'}
)
print(response.status_code)
session = GargTestRunner.testdb_session
new_project = Node(
typename = 'PROJECT',
name = "hello i'm a project",
)
session.add(new_project)
session.commit()
self.a_node_id = new_project.id
print("created a project with id: %i" % new_project.id)
# print(response.status_code) # expected: 302 FOUND
def test_071a_get_front_page(self):
''' get the front page / '''
......@@ -47,7 +53,7 @@ class RoutesChecker(TestCase):
self.assertIn('text/html', front_response.get('Content-Type'))
# on suppose que la page contiendra toujours ce titre
self.assertIn(b'<h1>Gargantext</h1>', front_response.content)
def test_071b_get_inexisting_page(self):
''' get the inexisting page /foo '''
front_response = self.client.get('/foo')
......@@ -64,20 +70,15 @@ class RoutesChecker(TestCase):
# 2) let's try to get things in the json
json_content = api_response.json()
print(json_content)
json_count = json_content['count']
json_nodes = json_content['records']
self.assertEqual(type(json_count), int)
self.assertEqual(type(json_nodes), list)
print("\ntesting nodecount: %i " % json_count)
def test_073_get_api_one_node(self):
''' get "api/nodes/<node_id>" '''
# we first get one node id by re-running this bit from test_072
a_node_id = self.client.get('/api/nodes').json()['records'][0]['id']
one_node_route = '/api/nodes/%i' % a_node_id
one_node_route = '/api/nodes/%i' % RoutesChecker.a_node_id
# print("\ntesting node route: %s" % one_node_route)
api_response = self.client.get(one_node_route)
self.assertTrue(api_response.has_header('Content-Type'))
......@@ -89,6 +90,7 @@ class RoutesChecker(TestCase):
print("\ntesting nodename:", nodename)
print("\ntesting nodetype:", nodetype)
self.assertIn(nodetype, NODETYPES)
self.assertEqual(nodename, "hello i'm a project")
# TODO http://localhost:8000/api/nodes?types[]=CORPUS
......
#!/usr/bin/python3 env
"""
STORY TEST SUITE
testing toolchain
"""
import os, sys, logging
from django.test import TestCase, Client, RequestFactory
from gargantext.models import Node, User
from gargantext.util.db import session
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.util.toolchain.main import *
DATA_SAMPLE_DIR = "/srv/gargantext_lib/test_samples/"
DATA_SAMPLE_NDOCS = [
None, # RESOURCETYPES
[50,4,50], # 1-europresse
[], # 2-jstor
[81,81], # 3-pubmed
[-1], # 4-scopus
[-1], # 5-web_of_science
[-1], # 6-zotero
[837,1000], #  7-csv
[-1], #  8-istex
[3,10], # 9-scoap
[-1], # 10-repec
]
class ToolChainRecipes(TestCase):
def setUp(self):
#self.session = GargTestRunner.testdb_session
self.session = session
self.log= logging.getLogger( "SomeTest.testSomething" )
self.client = Client()
self.user = User()
self.project = self._create_project()
self.source_list = [(resource["type"], resource["name"]) for resource in RESOURCETYPES]
self.source_list.insert(0, (0,"Select a database below"))
self.sample_files = self._collect_samples_files()
def tearDown(self):
#del self.session
del self.client
#del self.factory
del self.source_list
del self.sample_files
del self.project
def _create_project(self):
self.project = Node(
user_id = self.user.id,
typename = 'PROJECT',
name = "test1000",
)
self.session.add(self.project)
self.session.commit()
return self.project
def __count_node_children__(self, CurrNode, typename=None):
'''find ALL the children of a given Node [optionnal filter TYPENAME] '''
if typename is None:
self.children = CurrNode.children('', order=True).count()
else:
self.children = CurrNode.children(typename, order=True).count()
return self.children
def __find_node_parent__(self, CurrNode):
'''find the parent Node given a CurrNode '''
self.parent = self.session.query(Node).filter(Node.id == Node.parent_id, Node.name == name).first()
def _collect_samples_files(self):
from collections import defaultdict
from os.path import isfile, join
self.sample_files = {}
sources = [ r["name"].split("[")[0].lower().strip() for r in RESOURCETYPES]
sources = [r.replace(" ", "_") for r in sources]
#self.log.debug(sources)
for format_source in os.listdir(DATA_SAMPLE_DIR):
#self.log.debug(format_source)
full_path = join(DATA_SAMPLE_DIR, format_source)
if not os.path.isfile(full_path):
if format_source in sources:
self.sample_files[format_source] = [join(full_path, samplef) for samplef in os.listdir(full_path)]
return self.sample_files
def _create_corpus(self,name, source_type, sample_file):
self.corpus = self.project.add_child(
name = name,
typename = 'CORPUS',
)
self.corpus.add_resource(
type = int(source_type),
path = sample_file,
)
self.session.add(self.corpus)
self.session.commit()
return self.corpus
def _get_corpus(self, name):
corpus = self.session.query(Node).filter(Node.typename == "CORPUS", Node.name == name).first()
return corpus
def _run_recipe(self, source_type, expected_results):
"""
Each of the resources input test can follow this common recipe base
@param source_type: int (cf. constants.py RESOURCETYPES)
@param expected_results: []int (number of docs for each sample corpora of this source)
"""
source = get_resource(source_type)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
self.test_name = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
self.log.debug("\n" + self.test_name)
for i,sample_file in enumerate(self.sample_files[source_name]):
print("... sample_file:", sample_file)
expected_ndocs = expected_results[i]
name = "test_"+source_name+str(i)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
real_ndocs = self.__count_node_children__(self.corpus, "DOCUMENT")
print('==>\t'+str(source_type)+'\t'+str(i)+'\t'+sample_file+'\t'+str(real_ndocs))
self.assertEqual(real_ndocs, expected_ndocs)
status = self.corpus.status()
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
# ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
self.assertTrue(status["complete"])
def test_000_get_project(self):
self.client.get("/projects/%i" %self.project.id)
def tests_001_europresse(self):
'''testing Europresse parsing'''
self._run_recipe(1, DATA_SAMPLE_NDOCS[1])
def tests_002(self):
self._run_recipe(2, DATA_SAMPLE_NDOCS[2])
def tests_003(self):
self._run_recipe(3, DATA_SAMPLE_NDOCS[3])
def tests_004(self):
self._run_recipe(4, DATA_SAMPLE_NDOCS[4])
def tests_005(self):
self._run_recipe(5, DATA_SAMPLE_NDOCS[5])
def tests_006(self):
self._run_recipe(6, DATA_SAMPLE_NDOCS[6])
def tests_007(self):
self._run_recipe(7, DATA_SAMPLE_NDOCS[7])
def tests_008(self):
self._run_recipe(8, DATA_SAMPLE_NDOCS[8])
def tests_009(self):
self._run_recipe(9, DATA_SAMPLE_NDOCS[9])
def tests_010(self):
self._run_recipe(10, DATA_SAMPLE_NDOCS[10])
if __name__ == "__main__":
logging.basicConfig( stream=sys.stderr )
logging.getLogger( "unitests.test_090_toolchain" ).setLevel( logging.DEBUG )
unittest.main()
#!/usr/bin/python3 env
from gargantext.util.db import session
from django import TestCase
class UserRecipes(TestCase):
def setUp(self):
#before any test
self.session = session
self.client = Client()
def tearDown(self):
#after any test
pass
def test_000_create_user(self):
pass
def test_001_login(self):
pass
def test_002_authenticate(self):
pass
def test_003_unlogin(self):
pass
#!/usr/bin/python3 env
from django.test import TestCase
class ProjectsRecipes(TestCase):
def setUp(self):
#before anytest
self.session = session
self.client = Client()
def tearDown(self):
#after any test
pass
def _create_projet(self):
#resp = self.client.post('/projects/', data={"name":"test"})
self.project = Node(
user_id = user.id,
typename = 'PROJECT',
name = "test1",
)
session.add(self.project)
session.commit()
return self.project
def test_001_get_projects(self):
'''get every projects'''
resp = self.client.get('/projects/')
self.assertEqual(resp.status_code, 200)
def test_002_delete_projects(self):
'''delete every projects'''
resp = self.client.delete('/projects/')
self.assertEqual(resp.status_code, 204)
def test_003_put_projects(self):
'''modify every projects'''
resp = self.client.put('/projects?name="test"')
self.assertEqual(resp.status_code, 202)
def test_004_post_project(self):
'''create a project'''
resp = self.client.post('/projects/', data={"name":"test"})
self.assertEqual(resp.status_code, 201)
def test_005_get_project(self):
'''get one project'''
project = self._create_projet()
resp = self.client.delete('/project/'+project.id)
self.assertEqual(resp.status_code, 200)
def test_006_delete_project(self):
'''delete one project'''
project = self._create_projet()
#delete it
resp = self.client.delete('/project/'+project.id)
self.assertEqual(resp.status_code, 204)
def test_007_put_project(self):
project = self._create_projet()
resp = self.client.put('/project/'+project.id+"?name=newname")
self.assertEqual(resp.status_code, 204)
pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment