8000 docs(documentai): Update `handle_response` for OCR/Form Parser 2.0 GA… · davidxia/python-docs-samples@33b341e · GitHub
[go: up one dir, main page]

Skip to content

Commit 33b341e

Browse files
authored
docs(documentai): Update handle_response for OCR/Form Parser 2.0 GA Launch (GoogleCloudPlatform#10445)
- Update client library to `2.17` required for sample to work - Add `processOptions` to OCR - Add printing of Symbols and Text Styles (Not currently populated) - Reordered Blocks/Paragraphs to indicate parent elements first - Added generic entity extraction to Form Parser Sample - Removed Document Quality Processor samples (Deprecated in favor of Doc OCR)
1 parent 535baf8 commit 33b341e

File tree

3 files changed

+92
-73
lines changed

3 files changed

+92
-73
lines changed

documentai/snippets/handle_response_sample.py

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@
1515

1616
# [START documentai_process_ocr_document]
1717
# [START documentai_process_form_document]
18-
# [START documentai_process_quality_document]
1918
# [START documentai_process_specialized_document]
2019
# [START documentai_process_splitter_document]
21-
from typing import Sequence
20+
from typing import Optional, Sequence
2221

2322
from google.api_core.client_options import ClientOptions
24-
from google.cloud import documentai # type: ignore
23+
from google.cloud import documentai
2524

2625
# TODO(developer): Uncomment these variables before running the sample.
2726
# project_id = "YOUR_PROJECT_ID"
@@ -34,7 +33,6 @@
3433

3534
# [END documentai_process_ocr_document]
3635
# [END documentai_process_form_document]
37-
# [END documentai_process_quality_document]
3836
# [END documentai_process_specialized_document]
3937
# [END documentai_process_splitter_document]
4038

@@ -48,9 +46,25 @@ def process_document_ocr_sample(
4846
file_path: str,
4947
mime_type: str,
5048
) -> None:
49+
# Optional: Additional configurations for Document OCR Processor.
50+
# For more information: https://cloud.google.com/document-ai/docs/document-ocr
51+
process_options = documentai.ProcessOptions(
52+
ocr_config=documentai.OcrConfig(
53+
compute_style_info=True,
54+
enable_native_pdf_parsing=True,
55+
enable_image_quality_scores=True,
56+
enable_symbol=True,
57+
)
58+
)
5159
# Online processing request to Document AI
5260
document = process_document(
53-
project_id, location, processor_id, processor_version, file_path, mime_type
61+
project_id,
62+
location,
63+
processor_id,
64+
processor_version,
65+
file_path,
66+
mime_type,
67+
process_options=process_options,
5468
)
5569

5670
text = document.text
@@ -61,15 +75,21 @@ def process_document_ocr_sample(
6175
print(f"Page {page.page_number}:")
6276
print_page_dimensions(page.dimension)
6377
print_detected_langauges(page.detected_languages)
64-
print_paragraphs(page.paragraphs, text)
78+
6579
print_blocks(page.blocks, text)
80+
print_paragraphs(page.paragraphs, text)
6681
print_lines(page.lines, text)
6782
print_tokens(page.tokens, text)
6883

69-
# Currently supported in version `pretrained-ocr-v1.1-2022-09-12`
84+
if page.symbols:
85+
print_symbols(page.symbols, text)
86+
7087
if page.image_quality_scores:
7188
print_image_quality_scores(page.image_quality_scores)
7289

90+
if document.text_styles:
91+
print_styles(document.text_styles, text)
92+
7393

7494
def print_page_dimensions(dimension: documentai.Document.Page.Dimension) -> None:
7595
print(f" Width: {str(dimension.width)}")
@@ -81,8 +101,15 @@ def print_detected_langauges(
81101
) -> None:
82102
print(" Detected languages:")
83103
for lang in detected_languages:
84-
code = lang.language_code
85-
print(f" {code} ({lang.confidence:.1%} confidence)")
104+
print(f" {lang.language_code} ({lang.confidence:.1%} confidence)")
105+
106+
107+
def print_blocks(blocks: Sequence[documentai.Document.Page.Block], text: str) -> None:
108+
print(f" {len(blocks)} blocks detected:")
109+
first_block_text = layout_to_text(blocks[0].layout, text)
110+
print(f" First text block: {repr(first_block_text)}")
111+
last_block_text = layout_to_text(blocks[-1].layout, text)
112+
print(f" Last text block: {repr(last_block_text)}")
86113

87114

88115
def print_paragraphs(
@@ -95,14 +122,6 @@ def print_paragraphs(
95122
print(f" Last paragraph text: {repr(last_paragraph_text)}")
96123

97124

98-
def print_blocks(blocks: Sequence[documentai.Document.Page.Block], text: str) -> None:
99-
print(f" {len(blocks)} blocks detected:")
100-
first_block_text = layout_to_text(blocks[0].layout, text)
101-
print(f" First text block: {repr(first_block_text)}")
102-
last_block_text = layout_to_text(blocks[-1].layout, text)
103-
print(f" Last text block: {repr(last_block_text)}")
104-
105-
106125
def print_lines(lines: Sequence[documentai.Document.Page.Line], text: str) -> None:
107126
print(f" {len(lines)} lines detected:")
108127
first_line_text = layout_to_text(lines[0].layout, text)
@@ -123,6 +142,16 @@ def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) ->
123142
print(f" Last token break type: {repr(last_token_break_type)}")
124143

125144

145+
def print_symbols(
146+
symbols: Sequence[documentai.Document.Page.Symbol], text: str
147+
) -> None:
148+
print(f" {len(symbols)} symbols detected:")
149+
first_symbol_text = layout_to_text(symbols[0].layout, text)
150+
print(f" First symbol text: {repr(first_symbol_text)}")
151+
last_symbol_text = layout_to_text(symbols[-1].layout, text)
152+
print(f" Last symbol text: {repr(last_symbol_text)}")
153+
154+
126155
def print_image_quality_scores(
127156
image_quality_scores: documentai.Document.Page.ImageQualityScores,
128157
) -> None:
@@ -133,6 +162,19 @@ def print_image_quality_scores(
133162
print(f" {detected_defect.type_}: {detected_defect.confidence:.1%}")
134163

135164

165+
def print_styles(styles: Sequence[documentai.Document.Style], text: str) -> None:
166+
print(f" {len(styles)} styles detected:")
167+
first_style_text = layout_to_text(styles[0].layout, text)
168+
print(f" First style text: {repr(first_style_text)}")
169+
print(f" Color: {styles[0].color}")
170+
print(f" Background Color: {styles[0].background_color}")
171+
print(f" Font Weight: {styles[0].font_weight}")
172+
print(f" Text Style: {styles[0].text_style}")
173+
print(f" Text Decoration: {styles[0].text_decoration}")
174+
print(f" Font Size: {styles[0].font_size.size}{styles[0].font_size.unit}")
175+
print(f" Font Family: {styles[0].font_family}")
176+
177+
136178
# [END documentai_process_ocr_document]
137179
# [START documentai_process_form_document]
138180
def process_document_form_sample(
@@ -179,6 +221,16 @@ def process_document_form_sample(
179221
value = layout_to_text(field.field_value, text)
180222
print(f" * {repr(name.strip())}: {repr(value.strip())}")
181223

224+
# Supported in version `pretrained-form-parser-v2.0-2022-11-10` and later.
225+
# For more information: https://cloud.google.com/document-ai/docs/form-parser
226+
if document.entities:
227+
print(f"Found {len(document.entities)} generic entities:")
228+
for entity in document.entities:
229+
print_entity(entity)
230+
# Print Nested Entities
231+
for prop in entity.properties:
232+
print_entity(prop)
233+
182234
return document
183235

184236

@@ -194,37 +246,6 @@ def print_table_rows(
194246

195247

196248
# [END documentai_process_form_document]
197-
# [START documentai_process_quality_document]
198-
def process_document_quality_sample(
199-
project_id: str,
200-
location: str,
201-
processor_id: str,
202-
processor_version: str,
203-
file_path: str,
204-
mime_type: str,
205-
) -> None:
206-
# Online processing request to Document AI
207-
document = process_document(
208-
project_id, location, processor_id, processor_version, file_path, mime_type
209-
)
210-
211-
# Read the quality-specific information from the output from the
212-
# Intelligent Document Quality Processor:
213-
# https://cloud.google.com/document-ai/docs/processors-list#processor_doc-quality-processor
214-
# OCR and other data is also present in the quality processor's response.
215-
# Please see the OCR and other samples for how to parse other data in the
216-
# response.
217-
for entity in document.entities:
218-
conf_percent = f"{entity.confidence:.1%}"
219-
page_num = str(int(entity.page_anchor.page_refs[0].page) + 1)
220-
print(f"\nPage {page_num} has a quality score of {conf_percent}")
221-
222-
for prop in entity.properties:
223-
conf_percent = f"{prop.confidence:.1%}"
224-
print(f" * {prop.type_} score of {conf_percent}")
225-
226-
227-
# [END documentai_process_quality_document]
228249

229250

230251
# [START documentai_process_specialized_document]
@@ -241,8 +262,7 @@ def process_document_specialized_sample(
241262
project_id, location, processor_id, processor_version, file_path, mime_type
242263
)
243264

244-
# Extract entities from a specialized document
245-
# Most specalized processors follow a similar pattern.
265+
# Print extracted entities from entity extraction processor output.
246266
# For a complete list of processors see:
247267
# https://cloud.google.com/document-ai/docs/processors-list
248268
#
@@ -257,6 +277,7 @@ def process_document_specialized_sample(
257277
print_entity(prop)
258278

259279

280+
# [START documentai_process_form_document]
260281
def print_entity(entity: documentai.Document.Entity) -> None:
261282
# Fields detected. For a full list of fields for each processor see
262283
# the processor documentation:
@@ -274,6 +295,7 @@ def print_entity(entity: documentai.Document.Entity) -> None:
274295
print(f" * Normalized Value: {repr(normalized_value)}")
275296

276297

298+
# [END documentai_process_form_document]
277299
# [END documentai_process_specialized_document]
278300

279301

@@ -316,22 +338,18 @@ def page_refs_to_string(
316338
page_refs: Sequence[documentai.Document.PageAnchor.PageRef],
317339
) -> str:
318340
"""Converts a page ref to a string describing the page or page range."""
319-
if len(page_refs) == 1:
320-
num = str(int(page_refs[0].page) + 1)
321-
return f"page {num} is"
322-
323-
nums = ""
324-
for page_ref in page_refs:
325-
nums += f"{int(page_ref.page) + 1}, "
326-
return f"pages {nums[:-2]} are"
341+
pages = [str(int(page_ref.page) + 1) for page_ref in page_refs]
342+
if len(pages) == 1:
343+
return f"page {pages[0]} is"
344+
else:
345+
return f"pages {', '.join(pages)} are"
327346

328347

329348
# [END documentai_process_splitter_document]
330349

331350

332351
# [START documentai_process_ocr_document]
333352
# [START documentai_process_form_document]
334-
# [START documentai_process_quality_document]
335353
# [START documentai_process_specialized_document]
336354
# [START documentai_process_splitter_document]
337355
def process_document(
@@ -341,6 +359,7 @@ def process_document(
341359
processor_version: str,
342360
file_path: str,
343361
mime_type: str,
362+
process_options: Optional[documentai.ProcessOptions] = None,
344363
) -> documentai.Document:
345364
# You must set the `api_endpoint` if you use a location other than "us".
346365
client = documentai.DocumentProcessorServiceClient(
@@ -364,6 +383,8 @@ def process_document(
364383
request = documentai.ProcessRequest(
365384
name=name,
366385
raw_document=documentai.RawDocument(content=image_content, mime_type=mime_type),
386+
# Only supported for Document OCR processor
387+
process_options=process_options,
367388
)
368389

369390
result = client.process_document(request=request)
@@ -373,7 +394,6 @@ def process_document(
373394
return result.document
374395

375396

376-
# [END documentai_process_quality_document]
377397
# [END documentai_process_specialized_document]
378398
# [END documentai_process_splitter_document]
379399
def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
@@ -382,14 +402,12 @@ def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
382402
offsets in the entirety of the document"s text. This function converts
383403
offsets to a string.
384404
"""
385-
response = ""
386405
# If a text segment spans several lines, it will
387406
# be stored in different text segments.
388-
for segment in layout.text_anchor.text_segments:
389-
start_index = int(segment.start_index)
390-
end_index = int(segment.end_index)
391-
response += text[start_index:end_index]
392-
return response
407+
return "".join(
408+
text[int(segment.start_index) : int(segment.end_index)]
409+
for segment in layout.text_anchor.text_segments
410+
)
393411

394412

395413
# [END documentai_process_form_document]

documentai/snippets/handle_response_sample_test.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_process_document_ocr(capsys):
2424
location = "us"
2525
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
2626
processor_id = "52a38e080c1a7296"
27-
processor_version = "rc"
27+
processor_version = "pretrained-ocr-v1.0-2020-09-23"
2828
file_path = "resources/handwritten_form.pdf"
2929
mime_type = "application/pdf"
3030

@@ -63,17 +63,18 @@ def test_process_document_form():
6363
assert len(document.pages) == 1
6464
assert len(document.pages[0].tables[0].header_rows[0].cells) == 4
6565
assert len(document.pages[0].tables[0].body_rows) == 6
66+
assert len(document.entities) > 0
6667

6768

6869
def test_process_document_quality(capsys):
6970
location = "us"
7071
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
71-
processor_id = "7fcb597c523721b3"
72-
processor_version = "stable"
72+
processor_id = "52a38e080c1a7296"
73+
processor_version = "pretrained-ocr-v1.0-2020-09-23"
7374
poor_quality_file_path = "resources/document_quality_poor.pdf"
7475
mime_type = "application/pdf"
7576

76-
handle_response_sample.process_document_quality_sample(
77+
handle_response_sample.process_document_ocr_sample(
7778
project_id=project_id,
7879
location=location,
7980
processor_id=processor_id,
@@ -84,8 +85,8 @@ def test_process_document_quality(capsys):
8485
out, _ = capsys.readouterr()
8586

8687
expected_strings = [
87-
"Page 1 has a quality score of",
88-
"defect_blurry score of 9",
88+
"Quality score",
89+
"defect_blurry",
8990
"defect_noisy",
9091
]
9192
for expected_string in expected_strings:

documentai/snippets/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
google-cloud-documentai==2.15.0
1+
google-cloud-documentai==2.17.0
22
google-cloud-storage==2.9.0

0 commit comments

Comments
 (0)
0