Skip to content
This repository has been archived by the owner on Feb 22, 2024. It is now read-only.

Commit

Permalink
feat: added ability to request one file as multipart/form data (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
kravetsmic authored Apr 25, 2023
1 parent 9eded4e commit 0628540
Show file tree
Hide file tree
Showing 19 changed files with 712 additions and 733 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.10.1

* Add Ability to request one file as multipart/form data

# 0.10.0

* Update templates for generated API.
Expand Down
24 changes: 9 additions & 15 deletions test_unstructured_api_tools/api/test_file_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,7 @@ def _json_for_one_file(test_file):
([FILE_IMAGE], P_INPUT_1_AND_2_MULTI, JSON, 200, None),
([FILE_DOCX, FILE_IMAGE], P_INPUT_1_AND_2_MULTI, JSON, 200, None),
([FILE_DOCX, FILE_IMAGE], P_INPUT_1_AND_2_MULTI, MIXED, 200, None),
# json returned though mixed requested (maybe not a bug for 1 file?)
pytest.param([FILE_DOCX], P_INPUT_1_MULTI, MIXED, 200, None, marks=pytest.mark.xfail),
([FILE_DOCX], P_INPUT_1_MULTI, MIXED, 200, None),
# json returned though csv requested
pytest.param(
[FILE_IMAGE], P_INPUT_1_AND_2_MULTI, TEXT_CSV, 200, None, marks=pytest.mark.xfail
Expand Down Expand Up @@ -309,10 +308,7 @@ def test_process_file_2(
"gz_content_type",
[
([FILE_DOCX], JSON, RESPONSE_SCHEMA_ISD, 200, False, None, None),
# endpoint doesn't accept mixed media type for one file
pytest.param(
[FILE_DOCX], MIXED, RESPONSE_SCHEMA_ISD, 200, False, None, None, marks=pytest.mark.xfail
),
([FILE_DOCX], MIXED, RESPONSE_SCHEMA_ISD, 200, False, None, None),
# endpoint fails because media type text/csv should have response type str
pytest.param(
[FILE_DOCX],
Expand All @@ -330,7 +326,6 @@ def test_process_file_2(
[FILE_DOCX], None, RESPONSE_SCHEMA_ISD, 200, False, None, None, marks=pytest.mark.xfail
),
([FILE_DOCX], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
# endpoint doesn't accept mixed media type for one file
pytest.param(
[FILE_DOCX],
MIXED,
Expand All @@ -339,7 +334,6 @@ def test_process_file_2(
False,
None,
None,
marks=pytest.mark.xfail,
),
# endpoint fails because media type text/csv should have response type str
pytest.param(
Expand Down Expand Up @@ -377,7 +371,7 @@ def test_process_file_2(
None,
marks=pytest.mark.xfail,
),
([FILE_DOCX, FILE_IMAGE], None, RESPONSE_SCHEMA_ISD, 200, False, None, None),
([FILE_DOCX, FILE_IMAGE], None, RESPONSE_SCHEMA_ISD, 406, False, None, None),
([FILE_DOCX, FILE_IMAGE], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
([FILE_DOCX, FILE_IMAGE], MIXED, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
# endpoint fails because text/csv is not acceptable for multiple files
Expand All @@ -391,12 +385,12 @@ def test_process_file_2(
None,
marks=pytest.mark.xfail,
),
([FILE_DOCX, FILE_IMAGE], None, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
([FILE_DOCX, FILE_IMAGE], None, RESPONSE_SCHEMA_LABELSTUDIO, 406, False, None, None),
(
[FILE_DOCX, FILE_IMAGE, GZIP_FILE_IMAGE],
None,
RESPONSE_SCHEMA_LABELSTUDIO,
200,
406,
False,
None,
None,
Expand All @@ -405,7 +399,7 @@ def test_process_file_2(
[FILE_DOCX, FILE_IMAGE, GZIP_FILE_DOCX],
None,
RESPONSE_SCHEMA_LABELSTUDIO,
200,
406,
False,
None,
None,
Expand All @@ -414,7 +408,7 @@ def test_process_file_2(
[FILE_DOCX, FILE_IMAGE, GZIP_FILE_IMAGE, GZIP_FILE_DOCX],
None,
RESPONSE_SCHEMA_LABELSTUDIO,
200,
406,
False,
None,
None,
Expand Down Expand Up @@ -629,7 +623,7 @@ def test_process_file_3(
False,
None,
),
([FILE_DOCX], MIXED, RESPONSE_SCHEMA_ISD, P_INPUT_1_SINGLE, 406, None, False, None),
([FILE_DOCX], MIXED, RESPONSE_SCHEMA_ISD, P_INPUT_1_SINGLE, 200, None, False, None),
([], MIXED, RESPONSE_SCHEMA_ISD, P_INPUT_1_SINGLE, 400, None, False, None),
(
[GZIP_FILE_DOCX],
Expand Down Expand Up @@ -914,7 +908,7 @@ def test_process_file_4(
RESPONSE_SCHEMA_ISD,
P_INPUT_1_MULTI,
P_INPUT_2_EMPTY,
406,
200,
False,
None,
None,
Expand Down
6 changes: 3 additions & 3 deletions test_unstructured_api_tools/api/test_file_text_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def test_process_file_text_1(
([FILE_MARKDOWN], [FILE_TXT_1], TEXT_CSV, P_INPUT_2_MULTI, 406, False, None, None),
([], [FILE_TXT_1], JSON, P_INPUT_2_SINGLE, 200, False, None, None),
([FILE_DOCX], [], JSON, P_INPUT_2_SINGLE, 200, False, None, None),
([], [FILE_TXT_1], MIXED, P_INPUT_2_EMPTY, 406, False, None, None),
([], [FILE_TXT_1], MIXED, P_INPUT_2_EMPTY, 200, False, None, None),
(
[GZIP_FILE_DOCX],
[FILE_TXT_1],
Expand Down Expand Up @@ -687,7 +687,7 @@ def test_process_file_text_2(
),
([], [FILE_TXT_1], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
([FILE_DOCX], [], JSON, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
([FILE_DOCX], [], MIXED, RESPONSE_SCHEMA_LABELSTUDIO, 406, False, None, None),
([FILE_DOCX], [], MIXED, RESPONSE_SCHEMA_LABELSTUDIO, 200, False, None, None),
(
[GZIP_FILE_DOCX],
[FILE_TXT_1],
Expand Down Expand Up @@ -1063,7 +1063,7 @@ def test_process_file_text_3(
RESPONSE_SCHEMA_LABELSTUDIO,
P_INPUT_1_EMPTY,
P_INPUT_2_EMPTY,
406,
200,
False,
None,
None,
Expand Down
16 changes: 7 additions & 9 deletions test_unstructured_api_tools/api/test_text_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,7 @@ def test_process_text_2(
[
([FILE_TXT_1], JSON, 200, False, None, None),
([GZIP_FILE_TXT_1], JSON, 200, False, None, None),
# endpoint doesn't accept mixed media type for one file
pytest.param([FILE_TXT_1], MIXED, 200, False, None, None, marks=pytest.mark.xfail),
([FILE_TXT_1], MIXED, 200, False, None, None),
# endpoint fails because media type text/csv should have response type str
pytest.param([FILE_TXT_1], TEXT_CSV, 200, False, None, None, marks=pytest.mark.xfail),
# endpoint fails because media type text/csv should have response type str
Expand Down Expand Up @@ -421,19 +420,18 @@ def test_process_text_2(
None,
marks=pytest.mark.xfail,
),
([FILE_TXT_1, FILE_TXT_2], None, 200, False, None, None),
([FILE_TXT_1, FILE_TXT_2], None, 406, False, None, None),
([FILE_TXT_2], JSON, 200, False, None, None),
([GZIP_FILE_TXT_2], JSON, 200, False, None, None),
# endpoint doesn't accept mixed media type for one file
pytest.param([FILE_TXT_2], MIXED, 200, False, None, None, marks=pytest.mark.xfail),
([FILE_TXT_2], MIXED, 200, False, None, None),
# endpoint fails because media type text/csv should have response type str
pytest.param([FILE_TXT_2], TEXT_CSV, 200, False, None, None, marks=pytest.mark.xfail),
# endpoint fails because media type text/csv should have response type str
# because None response type has default text/csv value
pytest.param([FILE_TXT_2], None, 200, False, None, None, marks=pytest.mark.xfail),
([FILE_TXT_2, FILE_MARKDOWN], None, 200, True, None, None),
([FILE_TXT_2, FILE_TXT_1], None, 200, False, FILENAME_FORMATS[FILE_TXT_1], None),
([FILE_TXT_2, FILE_MARKDOWN], None, 400, False, FILENAME_FORMATS[FILE_TXT_1], None),
([FILE_TXT_2, FILE_MARKDOWN], None, 406, True, None, None),
([FILE_TXT_2, FILE_TXT_1], None, 406, False, FILENAME_FORMATS[FILE_TXT_1], None),
([FILE_TXT_2, FILE_MARKDOWN], None, 406, False, FILENAME_FORMATS[FILE_TXT_1], None),
([], None, 400, False, None, None),
([GZIP_FILE_TXT_1], JSON, 200, False, None, FILENAME_FORMATS[FILE_TXT_1]),
],
Expand Down Expand Up @@ -513,7 +511,7 @@ def test_process_text_3(
None,
),
([FILE_TXT_1, FILE_TXT_2], TEXT_CSV, RESPONSE_SCHEMA_ISD, 406, False, None, None),
([FILE_TXT_1], MIXED, RESPONSE_SCHEMA_ISD, 406, False, None, None),
([FILE_TXT_1], MIXED, RESPONSE_SCHEMA_ISD, 200, False, None, None),
([], JSON, RESPONSE_SCHEMA_ISD, 400, False, None, None),
(
[GZIP_FILE_TXT_1],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,55 +181,45 @@ def pipeline_1(
"multipart/mixed",
"application/json",
]:
return PlainTextResponse(
content=(
raise HTTPException(
detail=(
f"Conflict in media type {content_type}"
' with response type "multipart/mixed".\n'
),
status_code=status.HTTP_406_NOT_ACCEPTABLE,
)

def response_generator(is_multipart):
for file in files:
file_content_type = get_validated_mimetype(file)

_file = file.file

response = pipeline_api(
_file,
m_input2=input2,
filename=file.filename,
file_content_type=file_content_type,
)
if is_multipart:
if type(response) not in [str, bytes]:
response = json.dumps(response)
yield response

if content_type == "multipart/mixed":
return MultipartMixedResponse(
response_generator(is_multipart=True),
)
else:
return response_generator(is_multipart=False)
else:
file = files[0]
_file = file.file
def response_generator(is_multipart):
for file in files:
file_content_type = get_validated_mimetype(file)

file_content_type = get_validated_mimetype(file)
_file = file.file

response = pipeline_api(
_file,
m_input2=input2,
filename=file.filename,
file_content_type=file_content_type,
)
response = pipeline_api(
_file,
m_input2=input2,
filename=file.filename,
file_content_type=file_content_type,
)

return response
if is_multipart:
if type(response) not in [str, bytes]:
response = json.dumps(response)
yield response

if content_type == "multipart/mixed":
return MultipartMixedResponse(
response_generator(is_multipart=True),
)
else:
return (
list(response_generator(is_multipart=False))[0]
if len(files) == 1
else response_generator(is_multipart=False)
)
else:
return PlainTextResponse(
content='Request parameter "files" is required.\n',
raise HTTPException(
detail='Request parameter "files" is required.\n',
status_code=status.HTTP_400_BAD_REQUEST,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,49 +168,42 @@ def pipeline_1(
"multipart/mixed",
"application/json",
]:
return PlainTextResponse(
content=(
raise HTTPException(
detail=(
f"Conflict in media type {content_type}"
' with response type "multipart/mixed".\n'
),
status_code=status.HTTP_406_NOT_ACCEPTABLE,
)

def response_generator(is_multipart):
for file in files:
get_validated_mimetype(file)
def response_generator(is_multipart):
for file in files:
get_validated_mimetype(file)

_file = file.file
_file = file.file

response = pipeline_api(
_file,
)
if is_multipart:
if type(response) not in [str, bytes]:
response = json.dumps(response)
yield response

if content_type == "multipart/mixed":
return MultipartMixedResponse(
response_generator(is_multipart=True),
response = pipeline_api(
_file,
)
else:
return response_generator(is_multipart=False)
else:
file = files[0]
_file = file.file

get_validated_mimetype(file)
if is_multipart:
if type(response) not in [str, bytes]:
response = json.dumps(response)
yield response

response = pipeline_api(
_file,
if content_type == "multipart/mixed":
return MultipartMixedResponse(
response_generator(is_multipart=True),
)
else:
return (
list(response_generator(is_multipart=False))[0]
if len(files) == 1
else response_generator(is_multipart=False)
)

return response

else:
return PlainTextResponse(
content='Request parameter "files" is required.\n',
raise HTTPException(
detail='Request parameter "files" is required.\n',
status_code=status.HTTP_400_BAD_REQUEST,
)

Expand Down
Loading

0 comments on commit 0628540

Please sign in to comment.