reorganization of files, updates for method POST contexts validation along...

reorganization of files, updates for method POST contexts validation along with tests and utility functions and minor update

reorganization of files, updates for method POST contexts validation along...
reorganization of files, updates for method POST contexts validation along with tests and utility functions and minor update
3fab29e7 · Marie FU · cfc1e7cb · 3fab29e7 · 3fab29e7 · 3fab29e7
Commit 3fab29e7 authored Jan 27, 2025 by Marie FU
15 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@ htmlcov/
 dist/
 build/
 *.egg-info/
+
+*test_copy/
\ No newline at end of file
--- a/gargantools/src/terms.py
+++ b/gargantools/src/terms.py
 import json
 from flask import Blueprint, request

-from gargantools.utils.utils import check_fileEncoding, check_fileExtension
+from gargantools.utils.terms_utils import check_fileContent
+from gargantools.utils.utils import check_columnName, check_fileEncoding, check_fileExtension, get_fileContent, get_fileDelimiter

+COLUMN_NAMES = ["status", "label"]

 bp = Blueprint("terms", __name__, url_prefix="/terms")

@@ -21,4 +23,26 @@ def termsValidation():
        else:
            file_encoding = check_fileEncoding(file)
            if file_encoding is None:
-                return "Could not read the file\n", 422
\ No newline at end of file
+                return "Could not read the file\n", 422
+            elif get_fileDelimiter(file, file_encoding) != '\t':
+                return "Error while getting file delimiter.\n Can be the following : \n - File delimiter not found \n - Incorrect file delimiter, should be a tabulation \n - File is malformed\n", 422
+            else:
+                file_content = get_fileContent(file, '\t')
+                if file_content is None:
+                    print('here')
+                    return "Could not read the file\n", 422
+                else:
+                    header = file_content.keys()
+                    state, notFoundColumn = check_columnName(header, COLUMN_NAMES)
+                    if not state:
+                        return f"Some column names were not found {','.join(notFoundColumn)}\n", 422
+                    else:
+                        state, problems = check_fileContent(file_content)
+                        if state:
+                            return "Correct file\n", 200
+                        else:
+                            if problems is not None:
+                                return 'Incorrect file - File is not compatible with GarganText\n', 422
+                            else:
+                                return 'Unexpected error in file\n', 422
+            
\ No newline at end of file
--- a/gargantools/utils/contexts_utils.py
+++ b/gargantools/utils/contexts_utils.py
-import csv
-import pandas as pd
 import petl

 from gargantools.utils.utils import check_date, check_unacceptedCharactersQuote, check_unacceptedCharactersTab

--- a/gargantools/utils/utils.py
+++ b/gargantools/utils/utils.py
@@ -36,7 +36,11 @@ def get_fileName(filename):
    return filename.rsplit('.', 1)[:-1][0]

 def get_fileDelimiter(file, file_encoding):
-    fileDelimiter = csv.Sniffer().sniff(file.read().decode(file_encoding)).delimiter
+    try:
+        fileDelimiter = csv.Sniffer().sniff(file.read().decode(file_encoding)).delimiter
+    except Exception:
+        fileDelimiter = None
+        
    file.seek(0)

    return fileDelimiter

--- a/openapi.yaml
+++ b/openapi.yaml
@@ -335,9 +335,11 @@ paths:
                      type: string
                      description: message
                    example:
-                      - Missing file
-                      - Missing file origin
-                      - Incorrect format or file format not found
+                      - Could not read file
+                      - Incorrect file - File is not compatible with GarganText
+                      - Some column names were not found ...
+                      - "Error while getting file delimiter. \n Can be the following : \n - File delimiter not found \n - Incorrect file delimiter, should be a tabulation \n - File is malformed\n"
+                      - Unexpected error in file
        '404':
          description: Not Found
        '422':

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -19,8 +19,8 @@ def app():
    
 @pytest.fixture
 def test_file_copy(request):
-    tfile_name = request.param
-    tfile_path = os.path.join(os.path.dirname(__file__), 'test_files', tfile_name)
+    tfile_name, tfile_dir = request.param
+    tfile_path = os.path.join(os.path.dirname(__file__), 'test_files', tfile_dir, tfile_name)

    if not os.path.exists(tfile_path):
        raise FileNotFoundError(f"Test file {tfile_name} not found")
@@ -39,3 +39,7 @@ def client(app):
 @pytest.fixture
 def runner(app):
    return app.test_cli_runner()
+
+def id_files(param):
+    file, dir = param
+    return f"/{dir}/{file}"
--- a/tests/test_contexts_utils.py
+++ b/tests/test_contexts_utils.py
 from io import BytesIO
+from itertools import product
 import pytest

 from gargantools.utils.contexts_utils import check_fileContent
 from gargantools.utils.utils import get_fileContent
+from tests.conftest import id_files


-@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
+@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
 def test_check_fileContent(test_file_copy):
    with open(test_file_copy, "r") as f:
        f_data = BytesIO(f.read().encode("utf-8"))

--- a/tests/test_contexts_validate.py
+++ b/tests/test_contexts_validate.py
 from io import BytesIO
+from itertools import product
 from werkzeug.datastructures import FileStorage

 import pytest

+from tests.conftest import id_files
+

 def test_missingFile(client):
    response = client.post('/contexts')
@@ -10,7 +13,7 @@ def test_missingFile(client):
    assert response.data == b"Bad request, missing file\n"
    assert response.status_code == 400

-@pytest.mark.parametrize("test_file_copy", ["incorrect.csv", "malformed.csv", "incorrectDelimiter.csv", "incorrectHeader.csv", "incorrectExtention"], indirect=True)
+@pytest.mark.parametrize("test_file_copy", product(["incorrect.csv", "malformed.csv", "incorrectDelimiter.csv", "incorrectHeader.csv", "incorrectExtension"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
 def test_incorrectFile(client, test_file_copy):
    with open(test_file_copy, "rb") as f:
        f_data = BytesIO(f.read())
@@ -27,16 +30,16 @@ def test_incorrectFile(client, test_file_copy):
            assert response.data == b"File delimiter not found or Incorrect file delimiter, should be a tabulation\n"
        elif "incorrectHeader.csv" in test_file_copy:
            assert b"Some column names were not found" in response.data
-        elif "incorrectExtention" in test_file_copy:
+        elif "incorrectExtension" in test_file_copy:
            assert response.data == b"Incorrect file format or file format not found\n"
            assert response.status_code == 400
        else:
            assert False

-        if "incorrectExtention" not in test_file_copy:
+        if "incorrectExtension" not in test_file_copy:
            assert response.status_code == 422

-@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
+@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
 def test_correctFile(client, test_file_copy):
    with open(test_file_copy, "rb") as f:
        f_data = BytesIO(f.read())

--- a/tests/test_files/correct.csv
+++ b/tests/test_files/correct.csv
--- a/tests/test_files/incorrect.csv
+++ b/tests/test_files/incorrect.csv
--- a/tests/test_files/incorrectDelimiter.csv
+++ b/tests/test_files/incorrectDelimiter.csv
--- a/tests/test_files/incorrectExtention
+++ b/tests/test_files/incorrectExtention
--- a/tests/test_files/incorrectHeader.csv
+++ b/tests/test_files/incorrectHeader.csv
--- a/tests/test_files/malformed.csv
+++ b/tests/test_files/malformed.csv
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
 from io import BytesIO
+from itertools import product
 import os
 import pytest
-from gargantools.utils.contexts_utils import check_fileContent
 from gargantools.utils.utils import *
+from tests.conftest import id_files


 TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), 'test_files')
@@ -33,13 +34,13 @@ def test_get_fileName():
    assert get_fileName("afilename.csv") == "afilename"
    assert get_fileName("afile.name.csv") == "afile.name"

-@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
+@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
 def test_get_fileDelimiter(test_file_copy):
    with open(test_file_copy, "r") as f:
        f_data = BytesIO(f.read().encode("utf-8"))
        assert get_fileDelimiter(f_data, "utf-8") == "\t"

-@pytest.mark.parametrize("test_file_copy", ["correct.csv"], indirect=True)
+@pytest.mark.parametrize("test_file_copy", product(["correct.csv"], ["contexts"]), indirect=True, ids=lambda param : id_files(param))
 def test_get_fileContent(test_file_copy):
    with open(test_file_copy, "r") as f:
        f_data = BytesIO(f.read().encode("utf-8"))