Commit 3fab29e7 authored by Marie FU's avatar Marie FU

reorganization of files, updates for method POST contexts validation along...

reorganization of files, updates for method POST contexts validation along with tests and utility functions and minor update
parent cfc1e7cb
...@@ -12,3 +12,5 @@ htmlcov/ ...@@ -12,3 +12,5 @@ htmlcov/
dist/ dist/
build/ build/
*.egg-info/ *.egg-info/
*test_copy/
\ No newline at end of file
import json import json
from flask import Blueprint, request from flask import Blueprint, request
from gargantools.utils.utils import check_fileEncoding, check_fileExtension from gargantools.utils.terms_utils import check_fileContent
from gargantools.utils.utils import check_columnName, check_fileEncoding, check_fileExtension, get_fileContent, get_fileDelimiter
COLUMN_NAMES = ["status", "label"]
bp = Blueprint("terms", __name__, url_prefix="/terms") bp = Blueprint("terms", __name__, url_prefix="/terms")
...@@ -21,4 +23,26 @@ def termsValidation(): ...@@ -21,4 +23,26 @@ def termsValidation():
else: else:
file_encoding = check_fileEncoding(file) file_encoding = check_fileEncoding(file)
if file_encoding is None: if file_encoding is None:
return "Could not read the file\n", 422 return "Could not read the file\n", 422
\ No newline at end of file elif get_fileDelimiter(file, file_encoding) != '\t':
return "Error while getting file delimiter.\n Can be the following : \n - File delimiter not found \n - Incorrect file delimiter, should be a tabulation \n - File is malformed\n", 422
else:
file_content = get_fileContent(file, '\t')
if file_content is None:
print('here')
return "Could not read the file\n", 422
else:
header = file_content.keys()
state, notFoundColumn = check_columnName(header, COLUMN_NAMES)
if not state:
return f"Some column names were not found {','.join(notFoundColumn)}\n", 422
else:
state, problems = check_fileContent(file_content)
if state:
return "Correct file\n", 200
else:
if problems is not None:
return 'Incorrect file - File is not compatible with GarganText\n', 422
else:
return 'Unexpected error in file\n', 422
\ No newline at end of file
import csv
import pandas as pd
import petl import petl
from gargantools.utils.utils import check_date, check_unacceptedCharactersQuote, check_unacceptedCharactersTab from gargantools.utils.utils import check_date, check_unacceptedCharactersQuote, check_unacceptedCharactersTab
......
...@@ -36,7 +36,11 @@ def get_fileName(filename): ...@@ -36,7 +36,11 @@ def get_fileName(filename):
return filename.rsplit('.', 1)[:-1][0] return filename.rsplit('.', 1)[:-1][0]
def get_fileDelimiter(file, file_encoding): def get_fileDelimiter(file, file_encoding):
fileDelimiter = csv.Sniffer().sniff(file.read().decode(file_encoding)).delimiter try:
fileDelimiter = csv.Sniffer().sniff(file.read().decode(file_encoding)).delimiter
except Exception:
fileDelimiter = None
file.seek(0) file.seek(0)
return fileDelimiter return fileDelimiter
......
...@@ -335,9 +335,11 @@ paths: ...@@ -335,9 +335,11 @@ paths:
type: string type: string
description: message description: message
example: example:
- Missing file - Could not read file
- Missing file origin - Incorrect file - File is not compatible with GarganText
- Incorrect format or file format not found - Some column names were not found ...
- "Error while getting file delimiter. \n Can be the following : \n - File delimiter not found \n - Incorrect file delimiter, should be a tabulation \n - File is malformed\n"
- Unexpected error in file
'404': '404':
description: Not Found description: Not Found
'422': '422':
......
...@@ -19,8 +19,8 @@ def app(): ...@@ -19,8 +19,8 @@ def app():
@pytest.fixture @pytest.fixture
def test_file_copy(request): def test_file_copy(request):
tfile_name = request.param tfile_name, tfile_dir = request.param
tfile_path = os.path.join(os.path.dirname(__file__), 'test_files', tfile_name) tfile_path = os.path.join(os.path.dirname(__file__), 'test_files', tfile_dir, tfile_name)
if not os.path.exists(tfile_path): if not os.path.exists(tfile_path):
raise FileNotFoundError(f"Test file {tfile_name} not found") raise FileNotFoundError(f"Test file {tfile_name} not found")
...@@ -39,3 +39,7 @@ def client(app): ...@@ -39,3 +39,7 @@ def client(app):
@pytest.fixture @pytest.fixture
def runner(app): def runner(app):
return app.test_cli_runner() return app.test_cli_runner()
def id_files(param):
file, dir = param
return f"/{dir}/{file}"
from io import BytesIO from io import BytesIO
from itertools import product
import pytest import pytest
from gargantools.utils.contexts_utils import check_fileContent from gargantools.utils.contexts_utils import check_fileContent
from gargantools.utils.utils import get_fileContent from gargantools.utils.utils import get_fileContent
from tests.conftest import id_files
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True) @pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_check_fileContent(test_file_copy): def test_check_fileContent(test_file_copy):
with open(test_file_copy, "r") as f: with open(test_file_copy, "r") as f:
f_data = BytesIO(f.read().encode("utf-8")) f_data = BytesIO(f.read().encode("utf-8"))
......
from io import BytesIO from io import BytesIO
from itertools import product
from werkzeug.datastructures import FileStorage from werkzeug.datastructures import FileStorage
import pytest import pytest
from tests.conftest import id_files
def test_missingFile(client): def test_missingFile(client):
response = client.post('/contexts') response = client.post('/contexts')
...@@ -10,7 +13,7 @@ def test_missingFile(client): ...@@ -10,7 +13,7 @@ def test_missingFile(client):
assert response.data == b"Bad request, missing file\n" assert response.data == b"Bad request, missing file\n"
assert response.status_code == 400 assert response.status_code == 400
@pytest.mark.parametrize("test_file_copy", ["incorrect.csv", "malformed.csv", "incorrectDelimiter.csv", "incorrectHeader.csv", "incorrectExtention"], indirect=True) @pytest.mark.parametrize("test_file_copy", product(["incorrect.csv", "malformed.csv", "incorrectDelimiter.csv", "incorrectHeader.csv", "incorrectExtension"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_incorrectFile(client, test_file_copy): def test_incorrectFile(client, test_file_copy):
with open(test_file_copy, "rb") as f: with open(test_file_copy, "rb") as f:
f_data = BytesIO(f.read()) f_data = BytesIO(f.read())
...@@ -27,16 +30,16 @@ def test_incorrectFile(client, test_file_copy): ...@@ -27,16 +30,16 @@ def test_incorrectFile(client, test_file_copy):
assert response.data == b"File delimiter not found or Incorrect file delimiter, should be a tabulation\n" assert response.data == b"File delimiter not found or Incorrect file delimiter, should be a tabulation\n"
elif "incorrectHeader.csv" in test_file_copy: elif "incorrectHeader.csv" in test_file_copy:
assert b"Some column names were not found" in response.data assert b"Some column names were not found" in response.data
elif "incorrectExtention" in test_file_copy: elif "incorrectExtension" in test_file_copy:
assert response.data == b"Incorrect file format or file format not found\n" assert response.data == b"Incorrect file format or file format not found\n"
assert response.status_code == 400 assert response.status_code == 400
else: else:
assert False assert False
if "incorrectExtention" not in test_file_copy: if "incorrectExtension" not in test_file_copy:
assert response.status_code == 422 assert response.status_code == 422
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True) @pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_correctFile(client, test_file_copy): def test_correctFile(client, test_file_copy):
with open(test_file_copy, "rb") as f: with open(test_file_copy, "rb") as f:
f_data = BytesIO(f.read()) f_data = BytesIO(f.read())
......
from io import BytesIO from io import BytesIO
from itertools import product
import os import os
import pytest import pytest
from gargantools.utils.contexts_utils import check_fileContent
from gargantools.utils.utils import * from gargantools.utils.utils import *
from tests.conftest import id_files
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files') TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files')
...@@ -33,13 +34,13 @@ def test_get_fileName(): ...@@ -33,13 +34,13 @@ def test_get_fileName():
assert get_fileName("afilename.csv") == "afilename" assert get_fileName("afilename.csv") == "afilename"
assert get_fileName("afile.name.csv") == "afile.name" assert get_fileName("afile.name.csv") == "afile.name"
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True) @pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_get_fileDelimiter(test_file_copy): def test_get_fileDelimiter(test_file_copy):
with open(test_file_copy, "r") as f: with open(test_file_copy, "r") as f:
f_data = BytesIO(f.read().encode("utf-8")) f_data = BytesIO(f.read().encode("utf-8"))
assert get_fileDelimiter(f_data, "utf-8") == "\t" assert get_fileDelimiter(f_data, "utf-8") == "\t"
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True) @pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_get_fileContent(test_file_copy): def test_get_fileContent(test_file_copy):
with open(test_file_copy, "r") as f: with open(test_file_copy, "r") as f:
f_data = BytesIO(f.read().encode("utf-8")) f_data = BytesIO(f.read().encode("utf-8"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment