Commit b1403d58 authored by Marie FU's avatar Marie FU

feat: project setup, add method GET for contexts and terms TSV along with test files

parent 6ce1ee4a
.venv/
*.pyc
__pycache__/
instance/
.pytest_cache/
.coverage
htmlcov/
dist/
build/
*.egg-info/
# GarganTools
## About The project
## Virtual environment
Create a vitual environment
```shell
python3 -m venv .venv
```
enter environment
```shell
. .venv/bin/activate
```
exit environment
```shell
deactivate
```
## Install needed packages
for executing web app :
```shell
pip install -r requirement.txt
```
for executing test and coverage :
```shell
pip install -e .
pip install -r requirement_test.txt
```
## Tests and Coverage
Executing tests and coverage
```shell
coverage run -m pytest
```
View coverage report
```shell
coverage report
coverage html
```
## Start Project
```shell
flask --app gargantools.py run
```
import os
from flask import Flask
def create_app(test_config=None):
# create and configure the app
app = Flask(__name__, instance_relative_config=True)
app.config.from_mapping(
SECRET_KEY='dev'
)
if test_config is None:
# load the instance config, if it exists, when not testing
app.config.from_pyfile('config.py', silent=True)
else:
# load the test config if passed in
app.config.from_mapping(test_config)
# ensure the instance folder exists
try:
os.makedirs(app.instance_path)
except OSError:
pass
# will later become the homepage
@app.get('/')
def homepage():
return 'Welcome to GarganTools'
# a simple page that says hello
@app.get('/hello')
def hello():
return 'Hello, World!'
from . import contexts
from . import terms
app.register_blueprint(contexts.bp)
app.register_blueprint(terms.bp)
return app
import json
from flask import Blueprint
bp = Blueprint("contexts", __name__, url_prefix="/contexts")
@bp.get('')
def tsvTemplate():
return json.dumps({"Publication Day": 1, "Publication Month": 1, "Publication Year": 1, "Authors": "Some authors", "Title": "A title", "Abstract": "An abstract", "Source": "Some Source" }, indent=3)
import json
from flask import Blueprint
bp = Blueprint("terms", __name__, url_prefix="/terms")
@bp.get('')
def termsTemplate():
return json.dumps({"status": "MapTerm", "label": "A term"}, indent=3)
ALLOWED_EXTENSIONS = {'csv', 'tsv'}
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
\ No newline at end of file
openapi: '3.0.3'
info:
title: GarganTools
version: '1.0.0'
description: Are listed here all methods related to the web application GarganTools used to convert any documents format to a format compatible with GarganText.
servers:
- url: http://127.0.0.1:5000
tags:
- name: contexts
description: Convertion from any format to TSV
- name: terms
description: Convertion from any format to terms TSV
- name: api
description: API using tools
paths:
/contexts:
get:
tags:
- contexts
summary: Get the columns that should be in a documents CSV file
description: .
operationId: tsvTemplate
responses:
'200':
description: successful operation
content:
application/json:
schema:
$ref: '#/components/schemas/CSV'
'404':
description: Not Found
post:
tags:
- contexts
summary: Operation on one or multiple files of different origin
description: .
operationId: tsvOperation
parameters:
- in: query
name: from
schema:
type: string
enum:
- Harzing
- RIS
- Element
- MultivacTwitterX
- Europresse
- Prospéro
- PDF
requestBody:
content:
multipart/form-data:
schema:
type: object
required:
- file
properties:
newFileName:
type: string
default: ''
file:
$ref: '#/components/schemas/File'
headerMapping:
$ref: '#/components/schemas/CSVmapping'
responses:
'200':
description: TSV file or ZIP of TSV files
content:
application/octet-stream:
schema:
$ref: '#/components/schemas/File'
'400':
description: Bad Request
'404':
description: Not Found
'422':
description: Unprocessable Content
'500':
description: Unexpected Error
/contexts/pdf:
post:
tags:
- contexts
summary: Operation on one or multiple PDF files
description: .
operationId: anyPDFtoTSV
parameters:
- in: query
name: split
schema:
type: integer
example: 7
default: 7
maximum: 15
requestBody:
content:
multipart/form-data:
schema:
type: object
required:
- file
properties:
newFileName:
type: string
default: ''
file:
$ref: '#/components/schemas/File'
metadata:
type: string
format: binary
responses:
'200':
description: TSV file or ZIP of TSV files
content:
application/octet-stream:
schema:
$ref: '#/components/schemas/File'
'400':
description: Bad Request
'404':
description: Not Found
'422':
description: Unprocessable Content
'500':
description: Unexpected Error
/contexts/pdf/scientific:
post:
tags:
- contexts
summary: Operation on one or multiple PDF files
description: .
operationId: scientificPDFtoTSV
parameters:
- in: query
name: select
schema:
type: array
items:
type: string
requestBody:
content:
multipart/form-data:
schema:
type: object
required:
- file
properties:
newFileName:
type: string
default: ''
file:
$ref: '#/components/schemas/File'
metadata:
type: string
format: binary
responses:
'200':
description: TSV file or ZIP of TSV files
content:
application/octet-stream:
schema:
$ref: '#/components/schemas/File'
'400':
description: Bad Request
'404':
description: Not Found
'422':
description: Unprocessable Content
'500':
description: Unexpected Error
/terms:
get:
tags:
- terms
summary: Get the columns that should be in a terms CSV file
description: .
operationId: termsTemplate
responses:
'200':
description: successful operation
content:
application/json:
schema:
$ref: '#/components/schemas/Terms'
'404':
description: Not Found
post:
tags:
- terms
summary: Operation on one or multiple files concerning terms
description: .
operationId: termsOperation
parameters:
- in: query
name: from
schema:
type: string
enum:
- TXT
- Prospero
- GarganTextGEXF
requestBody:
content:
multipart/form-data:
schema:
type: object
required:
- file
properties:
newFileName:
type: string
default: ''
file:
$ref: '#/components/schemas/File'
headerMapping:
$ref: '#/components/schemas/TermsMapping'
responses:
'200':
description: terms TSV file or ZIP of terms TSV files
content:
application/octet-stream:
schema:
$ref: '#/components/schemas/File'
'400':
description: Bad Request
'404':
description: Not Found
'422':
description: Unprocessable Content
'500':
description: Unexpected Error
/api:
post:
tags:
- api
summary: Operation concerning API
description: .
operationId: apiOperation
parameters:
- in: query
name: language
schema:
type: string
enum:
- en
- fr
- es
- in: query
name: from
schema:
type: string
enum:
- Isidore
requestBody:
content:
multipart/form-data:
schema:
type: object
required:
- file
properties:
newFileName:
type: string
default: ''
file:
$ref: '#/components/schemas/File'
responses:
'200':
description: TSV file
content:
application/octet-stream:
schema:
$ref: '#/components/schemas/File'
'400':
description: Bad Request
'404':
description: Not Found
'500':
description: Unexpected Error
components:
schemas:
File:
type: string
format: binary
CSV:
type: object
properties:
Publication Day:
type: integer
minimum: 1
maximum: 31
example: 1
Publication Month:
type: integer
minimum: 1
maximum: 12
example: 1
Publication Year:
type: integer
minimum: 1
example: 1
Authors:
type: string
example: Some authors
Title:
type: string
example: A title
Abstract:
type: string
example: An abstract
Source:
type: string
example: Some Source
CSVmapping:
type: object
properties:
Publication Day:
type: string
example: my column name
Publication Month:
type: string
example: my column name
Publication Year:
type: string
example: my column name
Authors:
type: string
example: my column name
Title:
type: string
example: my column name
Abstract:
type: string
example: my column name
Source:
type: string
example: my column name
Terms:
type: object
properties:
status:
type: string
pattern: '^(Mapterm|CandidateTerm|StopTerm)$'
example: MapTerm
label:
type: string
example: A term
TermsMapping:
type: object
properties:
status:
type: string
example: my column name
label:
type: string
example: my column name
\ No newline at end of file
[project]
name = "gargantools"
version = "1.0.0"
description = "A web application for converting data from different format to a GarganText format"
dependencies = [
"flask",
]
[build-system]
requires = ["flit_core<4"]
build-backend = "flit_core.buildapi"
[tool.pytest.ini_options]
testpaths = ["tests"]
[tool.coverage.run]
branch = true
source = ["gargantools"]
Flask==3.1.0
\ No newline at end of file
pytest==8.3.4
coverage==7.6.10
\ No newline at end of file
import pytest
from gargantools import create_app
@pytest.fixture
def app():
app = create_app({
'TESTING': True,
})
yield app
@pytest.fixture
def client(app):
return app.test_client()
@pytest.fixture
def runner(app):
return app.test_cli_runner()
from gargantools import create_app
def test_config():
assert not create_app().testing
assert create_app({'TESTING': True}).testing
def test_homepage(client):
response = client.get('/')
assert response.status_code == 200
def test_hello(client):
response = client.get('/hello')
assert response.data == b'Hello, World!'
assert response.status_code == 200
import json
def test_tsvTemplate(client):
expected_response = str(json.dumps({"Publication Day": 1, "Publication Month": 1, "Publication Year": 1, "Authors": "Some authors", "Title": "A title", "Abstract": "An abstract", "Source": "Some Source"}, indent=3)).encode(encoding="utf-8")
response = client.get('/contexts')
assert response.data == expected_response
assert response.status_code == 200
def test_termsTemplate(client):
expected_response = str(json.dumps({"status": "MapTerm", "label": "A term"}, indent=3)).encode(encoding="utf-8")
response = client.get('/terms')
assert response.data == expected_response
assert response.status_code == 200
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment