8000 fix: improve input validation to allow quotes for strategy parameter … · Unstructured-IO/unstructured-api@456bb9b · GitHub
[go: up one dir, main page]

Skip to content

Commit 456bb9b

Browse files
authored
fix: improve input validation to allow quotes for strategy parameter (#457)
# Improve input validation to allow quotes for strategy parameter Updated the strategy parameter to allow `"` or `'` as input wrapped around valid input to reduce 4xx errors. # Testing Invoked CURL and REST requests passing values such as `'fast'`, `"fast"`, and `fast` for parameter values for the strategy. ``` curl -X POST "PATH_T 8000 O_API" \ -H 'accept: application/json' \ -H 'Content-Type: multipart/form-data' \ -H 'unstructured-api-key: KEY' \ -F "files=@/path_to_file.pdf" \ -F "strategy='fast'" \ -F "split-pdf-page=True" \ -F "split-pdf-allow-failed=True" \ -F "split-pdf-concurrency-level=15" ``` Added unit tests (passing).
1 parent c52a2d1 commit 456bb9b

File tree

9 files changed

+40
-5
lines changed

9 files changed

+40
-5
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.0.81.dev
2+
3+
* Update `strategy` parameter to allow `'` and `"` as input surrounding the value.
4+
15
## 0.0.80
26

37
* Bump to `unstructured` 0.15.10

prepline_general/api/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
app = FastAPI(
1414
title="Unstructured Pipeline API",
1515
summary="Partition documents with the Unstructured library",
16-
version="0.0.80",
16+
version="0.0.81",
1717
docs_url="/general/docs",
1818
openapi_url="/general/openapi.json",
1919
servers=[

prepline_general/api/general.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ def return_content_type(filename: str):
653653

654654

655655
@router.get("/general/v0/general", include_in_schema=False)
656-
@router.get("/general/v0.0.80/general", include_in_schema=False)
656+
@router.get("/general/v0.0.81/general", include_in_schema=False)
657657
async def handle_invalid_get_request():
658658
raise HTTPException(
659659
status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Only POST requests are supported."
@@ -668,7 +668,7 @@ async def handle_invalid_get_request():
668668
description="Description",
669669
operation_id="partition_parameters",
670670
)
671-
@router.post("/general/v0.0.80/general", include_in_schema=False)
671+
@router.post("/general/v0.0.81/general", include_in_schema=False)
672672
def general_partition(
673673
request: Request,
674674
# cannot use annotated type here because of a bug described here:

prepline_general/api/models/form_params.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def as_form(
156156
description="The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto",
157157
examples=["auto", "hi_res"],
158158
),
159-
BeforeValidator(SmartValueParser[str]().value_or_first_element),
159+
BeforeValidator(SmartValueParser[str]().literal_value_stripped_or_first_element),
160160
] = "auto",
161161
extract_image_block_types: Annotated[
162162
List[str],

prepline_general/api/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,17 @@ def value_or_first_element(self, value: Union[T, list[T]]) -> list[T] | T | None
9494
return [_cast_to_type(elem, container_elems_class) for elem in value]
9595
return _cast_to_type(value, origin_class) # noqa
9696

97+
def literal_value_stripped_or_first_element(self, value: str) -> str | None:
98+
"""Returns the value itself for literal strings and strips quotation characters.
99+
100+
Args:
101+
value (Union[T, List[str]]): value to cast to a type T or return as is
102+
"""
103+
origin_class, container_elems_class = self._get_origin_container_classes()
104+
value = value.replace("'", "")
105+
value = value.replace('"', "")
106+
return _cast_to_type(value, origin_class)
107+
97108
def _get_origin_container_classes(self) -> tuple[type, type | None]:
98109
"""Extracts class (and container class if it's a list) from a type hint
99110

preprocessing-pipeline-family.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
name: general
2-
version: 0.0.80
2+
version: 0.0.81

test_general/__init__.py

Whitespace-only changes.

test_general/api/__init__.py

Whitespace-only changes.

test_general/test_utils.py renamed to test_general/api/test_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,23 @@
3030
def test_smart_value_parser(desired_type: type, value_to_parse: Any, expected_result: Any):
3131
parsed_value = SmartValueParser[desired_type]().value_or_first_element(value_to_parse)
3232
assert expected_result == parsed_value
33+
34+
35+
@pytest.mark.parametrize(
36+
"desired_type, value_to_parse, expected_result",
37+
[
38+
(str, "fast", "fast"),
39+
(str, "'fast'", "fast"),
40+
(str, '"fast"', "fast"),
41+
(str, "!fast", "!fast"),
42+
(str, "fa'st", "fast"),
43+
(str, "fast''''''", "fast"),
44+
],
45+
)
46+
def test_literal_value_stripped_or_first_element(
47+
desired_type: type, value_to_parse: Any, expected_result: Any
48+
):
49+
parsed_value = SmartValueParser[desired_type]().literal_value_stripped_or_first_element(
50+
value_to_parse
51+
)
52+
assert expected_result == parsed_value

0 commit comments

Comments
 (0)
0