Commit 3fab29e7 authored by Marie FU's avatar Marie FU

reorganization of files, updates for method POST contexts validation along...

reorganization of files, updates for method POST contexts validation along with tests and utility functions and minor update
parent cfc1e7cb
......@@ -12,3 +12,5 @@ htmlcov/
dist/
build/
*.egg-info/
*test_copy/
\ No newline at end of file
import json
from flask import Blueprint, request
from gargantools.utils.utils import check_fileEncoding, check_fileExtension
from gargantools.utils.terms_utils import check_fileContent
from gargantools.utils.utils import check_columnName, check_fileEncoding, check_fileExtension, get_fileContent, get_fileDelimiter
COLUMN_NAMES = ["status", "label"]
bp = Blueprint("terms", __name__, url_prefix="/terms")
......@@ -21,4 +23,26 @@ def termsValidation():
else:
file_encoding = check_fileEncoding(file)
if file_encoding is None:
return "Could not read the file\n", 422
\ No newline at end of file
return "Could not read the file\n", 422
elif get_fileDelimiter(file, file_encoding) != '\t':
return "Error while getting file delimiter.\n Can be the following : \n - File delimiter not found \n - Incorrect file delimiter, should be a tabulation \n - File is malformed\n", 422
else:
file_content = get_fileContent(file, '\t')
if file_content is None:
print('here')
return "Could not read the file\n", 422
else:
header = file_content.keys()
state, notFoundColumn = check_columnName(header, COLUMN_NAMES)
if not state:
return f"Some column names were not found {','.join(notFoundColumn)}\n", 422
else:
state, problems = check_fileContent(file_content)
if state:
return "Correct file\n", 200
else:
if problems is not None:
return 'Incorrect file - File is not compatible with GarganText\n', 422
else:
return 'Unexpected error in file\n', 422
\ No newline at end of file
import csv
import pandas as pd
import petl
from gargantools.utils.utils import check_date, check_unacceptedCharactersQuote, check_unacceptedCharactersTab
......
......@@ -36,7 +36,11 @@ def get_fileName(filename):
return filename.rsplit('.', 1)[:-1][0]
def get_fileDelimiter(file, file_encoding):
fileDelimiter = csv.Sniffer().sniff(file.read().decode(file_encoding)).delimiter
try:
fileDelimiter = csv.Sniffer().sniff(file.read().decode(file_encoding)).delimiter
except Exception:
fileDelimiter = None
file.seek(0)
return fileDelimiter
......
......@@ -335,9 +335,11 @@ paths:
type: string
description: message
example:
- Missing file
- Missing file origin
- Incorrect format or file format not found
- Could not read file
- Incorrect file - File is not compatible with GarganText
- Some column names were not found ...
- "Error while getting file delimiter. \n Can be the following : \n - File delimiter not found \n - Incorrect file delimiter, should be a tabulation \n - File is malformed\n"
- Unexpected error in file
'404':
description: Not Found
'422':
......
......@@ -19,8 +19,8 @@ def app():
@pytest.fixture
def test_file_copy(request):
tfile_name = request.param
tfile_path = os.path.join(os.path.dirname(__file__), 'test_files', tfile_name)
tfile_name, tfile_dir = request.param
tfile_path = os.path.join(os.path.dirname(__file__), 'test_files', tfile_dir, tfile_name)
if not os.path.exists(tfile_path):
raise FileNotFoundError(f"Test file {tfile_name} not found")
......@@ -39,3 +39,7 @@ def client(app):
@pytest.fixture
def runner(app):
return app.test_cli_runner()
def id_files(param):
file, dir = param
return f"/{dir}/{file}"
from io import BytesIO
from itertools import product
import pytest
from gargantools.utils.contexts_utils import check_fileContent
from gargantools.utils.utils import get_fileContent
from tests.conftest import id_files
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_check_fileContent(test_file_copy):
with open(test_file_copy, "r") as f:
f_data = BytesIO(f.read().encode("utf-8"))
......
from io import BytesIO
from itertools import product
from werkzeug.datastructures import FileStorage
import pytest
from tests.conftest import id_files
def test_missingFile(client):
response = client.post('/contexts')
......@@ -10,7 +13,7 @@ def test_missingFile(client):
assert response.data == b"Bad request, missing file\n"
assert response.status_code == 400
@pytest.mark.parametrize("test_file_copy", ["incorrect.csv", "malformed.csv", "incorrectDelimiter.csv", "incorrectHeader.csv", "incorrectExtention"], indirect=True)
@pytest.mark.parametrize("test_file_copy", product(["incorrect.csv", "malformed.csv", "incorrectDelimiter.csv", "incorrectHeader.csv", "incorrectExtension"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_incorrectFile(client, test_file_copy):
with open(test_file_copy, "rb") as f:
f_data = BytesIO(f.read())
......@@ -27,16 +30,16 @@ def test_incorrectFile(client, test_file_copy):
assert response.data == b"File delimiter not found or Incorrect file delimiter, should be a tabulation\n"
elif "incorrectHeader.csv" in test_file_copy:
assert b"Some column names were not found" in response.data
elif "incorrectExtention" in test_file_copy:
elif "incorrectExtension" in test_file_copy:
assert response.data == b"Incorrect file format or file format not found\n"
assert response.status_code == 400
else:
assert False
if "incorrectExtention" not in test_file_copy:
if "incorrectExtension" not in test_file_copy:
assert response.status_code == 422
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_correctFile(client, test_file_copy):
with open(test_file_copy, "rb") as f:
f_data = BytesIO(f.read())
......
from io import BytesIO
from itertools import product
import os
import pytest
from gargantools.utils.contexts_utils import check_fileContent
from gargantools.utils.utils import *
from tests.conftest import id_files
TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files')
......@@ -33,13 +34,13 @@ def test_get_fileName():
assert get_fileName("afilename.csv") == "afilename"
assert get_fileName("afile.name.csv") == "afile.name"
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_get_fileDelimiter(test_file_copy):
with open(test_file_copy, "r") as f:
f_data = BytesIO(f.read().encode("utf-8"))
assert get_fileDelimiter(f_data, "utf-8") == "\t"
@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
def test_get_fileContent(test_file_copy):
with open(test_file_copy, "r") as f:
f_data = BytesIO(f.read().encode("utf-8"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment