Commit c0d41991 authored by c24b's avatar c24b Committed by Romain Loth

REPEC??? JSON OK

(rloth: merging only the file unittests/tests_090_toolchain.py from this past commit by c24b)
(especially for the excellent logging strategy)
parent f31ff6d7
#!/usr/bin/python3 env #!/usr/bin/python3 env
""" """
TOOLCHAIN TEST SUITE STORY TEST SUITE
testing toolchain
""" """
#switching to standard testing import os, sys, logging
from django.test import TestCase from django.test import TestCase, Client, RequestFactory
from django.test import Client from gargantext.models import Node, User
from gargantext.util.db import session
#from django.contrib.auth.models import User
#from django.contrib.auth import authenticate
# test Nodes from gargantext.constants import RESOURCETYPES, NODETYPES, get_resource
from gargantext.models import Node
from gargantext.constants import RESOURCETYPES, NODETYPES
# provides GargTestRunner.testdb_session # provides GargTestRunner.testdb_session
from unittests.framework import GargTestRunner #from unittests.framework import GargTestRunner
import os
from gargantext.util.toolchain.main import *
DATA_SAMPLE_DIR = "/srv/gargantext_lib/data_samples/" DATA_SAMPLE_DIR = "/srv/gargantext_lib/data_samples/"
class ToolChainRecipes(TestCase): class ToolChainRecipes(TestCase):
def setUp(self): def setUp(self):
self.session = GargTestRunner.testdb_session #self.session = GargTestRunner.testdb_session
self.session = session
self.log= logging.getLogger( "SomeTest.testSomething" )
self.client = Client() self.client = Client()
self.user = User()
# login --------------------------------------------------- self.project = self._create_project()
response = self.client.post(
'/auth/login/',
{'username': 'gargantua', 'password': 'gargantua'}
)
# ---------------------------------------------------------
self.source_list = [(resource["type"], resource["name"]) for resource in RESOURCETYPES] self.source_list = [(resource["type"], resource["name"]) for resource in RESOURCETYPES]
self.source_list.insert(0, (0,"Select a database below")) self.source_list.insert(0, (0,"Select a database below"))
#self.files = for d in os.path.join("/home/")
self.sample_files = self._collect_samples_files() self.sample_files = self._collect_samples_files()
def tearDown(self): def tearDown(self):
del self.session #del self.session
del self.client del self.client
#del self.factory
del self.source_list del self.source_list
del self.file_list del self.sample_files
def list_data_samples(self): del self.project
pass
def _create_project(self): def _create_project(self):
self.project = Node( self.project = Node(
user_id = user.id, user_id = self.user.id,
typename = 'PROJECT', typename = 'PROJECT',
name = "test1", name = "test1000",
) )
session.add(self.project) self.session.add(self.project)
session.commit() self.session.commit()
return self.project return self.project
def create_test(self):
#need a file a name and a sourcetype
pass
def __create_user__(self, name="john", password="lucyinthesky", mail='lennon@thebeatles.com'):
user = User.objects.create_user(name, mail, password)
user.save()
self.user = User.objects.get(name="john")
return self.user
def __find_node__(self, typename, name=None): def __count_node_children__(self, CurrNode, typename=None):
'''find a node by typenode and name'''
if name is not None:
self.node = self.session.query(Node).filter(Node.typename == typename, Node.name == name).first()
else:
self.node = self.session.query(Node).filter(Node.typename == typename).first()
def __find_nodes__(self, typename):
'''find nodes by typename'''
self.nodes = self.session.query(Node).filter(Node.typename == typename).all()
def __find_node_children__(self, CurrNode, typename=None):
'''find ALL the children of a given Node [optionnal filter TYPENAME] ''' '''find ALL the children of a given Node [optionnal filter TYPENAME] '''
if typename is None: if typename is None:
self.children = CurrNode.children('', order=True).all() self.children = CurrNode.children('', order=True).count()
else: else:
self.children = CurrNode.children(typename, order=True).all() self.children = CurrNode.children(typename, order=True).count()
return self.children
def __find_node_parent__(self, CurrNode): def __find_node_parent__(self, CurrNode):
'''find the parent Node given a CurrNode ''' '''find the parent Node given a CurrNode '''
self.parent = self.session.query(Node).filter(Node.id == Node.parent_id, Node.name == name).first() self.parent = self.session.query(Node).filter(Node.id == Node.parent_id, Node.name == name).first()
def __get_statuses__(self, Node): def _collect_samples_files(self):
'''get the status of the current Node''' from collections import defaultdict
self.statuses = Node.get_status() from os.path import isfile, join
self.sample_files = {}
sources = [ r["name"].split("[")[0].lower().strip() for r in RESOURCETYPES]
sources = [r.replace(" ", "_") for r in sources]
#self.log.debug(sources)
for format_source in os.listdir(DATA_SAMPLE_DIR):
#self.log.debug(format_source)
full_path = join(DATA_SAMPLE_DIR, format_source)
if not os.path.isfile(full_path):
if format_source in sources:
self.sample_files[format_source] = [join(full_path, samplef) for samplef in os.listdir(full_path)]
return self.sample_files
def _create_corpus(self,name, source_type, sample_file):
self.corpus = self.project.add_child(
name = name,
typename = 'CORPUS',
)
self.corpus.add_resource(
type = int(source_type),
path = sample_file,
)
self.session.add(self.corpus)
self.session.commit()
return self.corpus
def _get_corpus(self, name):
corpus = self.session.query(Node).filter(Node.typename == "CORPUS", Node.name == name).first()
return corpus
def test_000_post(self):
self.client.get("/projects/%i" %self.project.id)
def tests_001_europresse(self):
'''testing Europresse parsing'''
#create a project
__name__ = ">> "+ sys._getframe().f_code.co_name +":"
self.log.debug("\n", __name__)
source_type = 1 source_type = 1
for i,sample_file in enumerate(self.sample_files["europresse"]): for i,sample_file in enumerate(self.sample_files["europresse"]):
name = "testEuropress_"+str(i) name = "testEuropress_"+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 4) self.assertEqual(docs, 4)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -114,26 +129,26 @@ class ToolChainRecipes(TestCase): ...@@ -114,26 +129,26 @@ class ToolChainRecipes(TestCase):
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50) self.assertEqual(docs, 50)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -144,27 +159,27 @@ class ToolChainRecipes(TestCase): ...@@ -144,27 +159,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(3) source = get_resource(3)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 81) self.assertEqual(docs, 81)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -175,27 +190,27 @@ class ToolChainRecipes(TestCase): ...@@ -175,27 +190,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(4) source = get_resource(4)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50) self.assertEqual(docs, 50)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -206,27 +221,27 @@ class ToolChainRecipes(TestCase): ...@@ -206,27 +221,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(5) source = get_resource(5)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50) self.assertEqual(docs, 50)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -237,27 +252,27 @@ class ToolChainRecipes(TestCase): ...@@ -237,27 +252,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(6) source = get_resource(6)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50) self.assertEqual(docs, 50)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -268,27 +283,27 @@ class ToolChainRecipes(TestCase): ...@@ -268,27 +283,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(7) source = get_resource(7)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 837) self.assertEqual(docs, 837)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -299,27 +314,27 @@ class ToolChainRecipes(TestCase): ...@@ -299,27 +314,27 @@ class ToolChainRecipes(TestCase):
source = get_resource(8) source = get_resource(8)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50) self.assertEqual(docs, 50)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -330,24 +345,24 @@ class ToolChainRecipes(TestCase): ...@@ -330,24 +345,24 @@ class ToolChainRecipes(TestCase):
source = get_resource(9) source = get_resource(9)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 10) self.assertEqual(docs, 10)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
...@@ -356,27 +371,34 @@ class ToolChainRecipes(TestCase): ...@@ -356,27 +371,34 @@ class ToolChainRecipes(TestCase):
#create a project #create a project
source_type = 10 source_type = 10
source = get_resource(10) source = get_resource(10)
self.log.debug(source)
source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_") source_name = source["name"].split("[")[0].lower().strip().replace(" ", "_")
self.log.debug(source_name)
__name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":" __name__ = ">> "+ sys._getframe().f_code.co_name +"_"+str(source_name)+":"
print("\n", __name__) self.log.debug("\n", __name__)
for i,sample_file in enumerate(self.sample_files[source_name]): for i,sample_file in enumerate(self.sample_files[source_name]):
name = "test_"+source_name+str(i) name = "test_"+source_name+str(i)
print("\t- Checking creation of corpus %s" %name) self.log.debug("\t- Checking creation of corpus %s" %name)
self.corpus = self._create_corpus(name, source_type, sample_file) self.corpus = self._create_corpus(name, source_type, sample_file)
db_corpus = self._get_corpus(name) db_corpus = self._get_corpus(name)
#corpus check #corpus check
self.assertEqual(self.corpus.name, db_corpus.name) self.assertEqual(self.corpus.name, db_corpus.name)
print("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"]) self.log.debug("\t- Checking creation of resource type '%s' " %get_resource(source_type)["name"])
self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"]) self.assertEqual(self.corpus.resources()[0]["type"], db_corpus.resources()[0]["type"])
print("\t- Parsing and indexing corpus") self.log.debug("\t- Parsing and indexing corpus")
parse(self.corpus) parse(self.corpus)
docs = self.__count_node_children__(self.corpus, "DOCUMENT") docs = self.__count_node_children__(self.corpus, "DOCUMENT")
self.assertEqual(docs, 50) self.assertEqual(docs, 50)
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
print("\t- Extracting ngrams") self.log.debug("\t- Extracting ngrams")
extract_ngrams(self.corpus) extract_ngrams(self.corpus)
ngrams = self.__count_node_children__(self.corpus, "NGRAMS") ngrams = self.__count_node_children__(self.corpus, "NGRAMS")
status = self.corpus.status() status = self.corpus.status()
self.assertTrue(status["complete"]) self.assertTrue(status["complete"])
if __name__ == "__main__":
logging.basicConfig( stream=sys.stderr )
logging.getLogger( "unitests.test_090_toolchain" ).setLevel( logging.DEBUG )
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment