Commit c0d41991 authored by c24b's avatar c24b Committed by Romain Loth

REPEC??? JSON OK

(rloth: merging only the file unittests/tests_090_toolchain.py from this past commit by c24b)
(especially for the excellent logging strategy)
parent f31ff6d7
#!/usr/bin/python3 env
"""
TOOLCHAIN TEST SUITE
STORY TEST SUITE
testing toolchain
"""
#switching to standard testing
from django.test import TestCase
from django.test import Client
#from django.contrib.auth.models import User
#from django.contrib.auth import authenticate
# test Nodes
from gargantext.models import Node
from gargantext.constants import RESOURCETYPES, NODETYPES
import os, sys, logging
from django.test import TestCase, Client, RequestFactory
from gargantext.models import Node, User
from gargantext.util.db import session
from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
# provides GargTestRunner.testdb_session
from unittests.framework import GargTestRunner
import os
#from unittests.framework import GargTestRunner
from gargantext.util.toolchain.main import *
DATA_SAMPLE_DIR = "/srv/gargantext_lib/data_samples/"
class ToolChainRecipes(TestCase):
def setUp(self):
self.session = GargTestRunner.testdb_session
#self.session = GargTestRunner.testdb_session
self.session = session
self.log= logging.getLogger( "SomeTest.testSomething" )
self.client = Client()
# login ---------------------------------------------------
response = self.client.post(
'/auth/login/',
{'username': 'gargantua', 'password': 'gargantua'}
)
# ---------------------------------------------------------
self.user = User()
self.project = self._create_project()
self.source_list = [(resource["type"], resource["name"]) for resource in RESOURCETYPES]
self.source_list.insert(0, (0,"Select a database below"))
#self.files = for d in os.path.join("/home/")
self.sample_files = self._collect_samples_files()
def tearDown(self):
del self.session
#del self.session
del self.client
#del self.factory
del self.source_list
del self.file_list
def list_data_samples(self):
pass
del self.sample_files
del self.project
def _create_project(self):
self.project = Node(
user_id = user.id,
user_id = self.user.id,
typename = 'PROJECT',
name = "test1",
name = "test1000",
)
session.add(self.project)
session.commit()
self.session.add(self.project)
self.session.commit()
return self.project
def create_test(self):
#need a file a name and a sourcetype
pass
def __create_user__(self, name="john", password="lucyinthesky", mail='lennon@thebeatles.com'):
user = User.objects.create_user(name, mail, password)
user.save()
self.user = User.objects.get(name="john")
return self.user
def __find_node__(self, typename, name=None):
'''find a node by typenode and name'''
if name is not None:
self.node = self.session.query(Node).filter(Node.typename == typename, Node.name == name).first()
else:
self.node = self.session.query(Node).filter(Node.typename == typename).first()
def __find_nodes__(self, typename):
'''find nodes by typename'''
self.nodes = self.session.query(Node).filter(Node.typename == typename).all()
def __find_node_children__(self, CurrNode, typename=None):
def __count_node_children__(self, CurrNode, typename=None):
'''find ALL the children of a given Node [optionnal filter TYPENAME] '''
if typename is None:
self.children = CurrNode.children('', order=True).all()
self.children = CurrNode.children('', order=True).count()
else:
self.children = CurrNode.children(typename, order=True).all()
self.children = CurrNode.children(typename, order=True).count()
return self.children
def __find_node_parent__(self, CurrNode):
'''find the parent Node given a CurrNode '''
self.parent = self.session.query(Node).filter(Node.id == Node.parent_id, Node.name == name).first()
def __get_statuses__(self, Node):
'''get the status of the current Node'''
self.statuses = Node.get_status()
def _collect_samples_files(self):
from collections import defaultdict
from os.path import isfile, join
self.sample_files = {}
sources = [ r["name"].split("[")[0].lower().strip() for r in RESOURCETYPES]
sources = [r.replace(" ", "_") for r in sources]
#self.log.debug(sources)
for format_source in os.listdir(DATA_SAMPLE_DIR):
#self.log.debug(format_source)
full_path = join(DATA_SAMPLE_DIR, format_source)
if not os.path.isfile(full_path):
if format_source in sources:
self.sample_files[format_source] = [join(full_path, samplef) for samplef in os.listdir(full_path)]
return self.sample_files
def _create_corpus(self,name, source_type, sample_file):
self.corpus = self.project.add_child(
name = name,
typename = 'CORPUS',
)
self.corpus.add_resource(
type = int(source_type),
path = sample_file,
)
self.session.add(self.corpus)
self.session.commit()
return self.corpus
def _get_corpus(self, name):
corpus = self.session.query(Node).filter(Node.typename == "CORPUS", Node.name == name).first()
return corpus
def test_000_post(self):
self.client.get("/projects/%i" %self.project.id)
def tests_001_europresse(self):
'''testing Europresse parsing'''
#create a project
__name__ = ">> "+ sys._getframe().f_code.co_name +":"
self.log.debug("\n", __name__)
source_type = 1
for i,sample_file in enumerate(self.sample_files["europresse"]):
name = "testEuropress_"+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 4)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -114,26 +129,26 @@ class ToolChainRecipes(TestCase):
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -144,27 +159,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(3)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 81)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -175,27 +190,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(4)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -206,27 +221,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(5)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -237,27 +252,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(6)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -268,27 +283,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(7)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 837)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -299,27 +314,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(8)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -330,24 +345,24 @@ class ToolChainRecipes(TestCase):
source = get_resource(9)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 10)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
......@@ -356,27 +371,34 @@ class ToolChainRecipes(TestCase):
#create a project
source_type = 10
source = get_resource(10)
self.log.debug(source)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
self.log.debug(source_name)
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__)
self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name)
self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name)
#corpus check
self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus")
self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50)
status = self.corpus.status()
self.assertTrue(status["complete"])
print("\t- Extracting ngrams")
self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status()
self.assertTrue(status["complete"])
if __name__ == "__main__":
logging.basicConfig( stream=sys.stderr )
logging.getLogger( "unitests.test_090_toolchain" ).setLevel( logging.DEBUG )
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment