15
15
16
16
# [START documentai_process_ocr_document]
17
17
# [START documentai_process_form_document]
18
- # [START documentai_process_quality_document]
19
18
# [START documentai_process_specialized_document]
20
19
# [START documentai_process_splitter_document]
21
- from typing import Sequence
20
+ from typing import Optional , Sequence
22
21
23
22
from google .api_core .client_options import ClientOptions
24
- from google .cloud import documentai # type: ignore
23
+ from google .cloud import documentai
25
24
26
25
# TODO(developer): Uncomment these variables before running the sample.
27
26
# project_id = "YOUR_PROJECT_ID"
34
33
35
34
# [END documentai_process_ocr_document]
36
35
# [END documentai_process_form_document]
37
- # [END documentai_process_quality_document]
38
36
# [END documentai_process_specialized_document]
39
37
# [END documentai_process_splitter_document]
40
38
@@ -48,9 +46,25 @@ def process_document_ocr_sample(
48
46
file_path : str ,
49
47
mime_type : str ,
50
48
) -> None :
49
+ # Optional: Additional configurations for Document OCR Processor.
50
+ # For more information: https://cloud.google.com/document-ai/docs/document-ocr
51
+ process_options = documentai .ProcessOptions (
52
+ ocr_config = documentai .OcrConfig (
53
+ compute_style_info = True ,
54
+ enable_native_pdf_parsing = True ,
55
+ enable_image_quality_scores = True ,
56
+ enable_symbol = True ,
57
+ )
58
+ )
51
59
# Online processing request to Document AI
52
60
document = process_document (
53
- project_id , location , processor_id , processor_version , file_path , mime_type
61
+ project_id ,
62
+ location ,
63
+ processor_id ,
64
+ processor_version ,
65
+ file_path ,
66
+ mime_type ,
67
+ process_options = process_options ,
54
68
)
55
69
56
70
text = document .text
@@ -61,15 +75,21 @@ def process_document_ocr_sample(
61
75
print (f"Page { page .page_number } :" )
62
76
print_page_dimensions (page .dimension )
63
77
print_detected_langauges (page .detected_languages )
64
- print_paragraphs ( page . paragraphs , text )
78
+
65
79
print_blocks (page .blocks , text )
80
+ print_paragraphs (page .paragraphs , text )
66
81
print_lines (page .lines , text )
67
82
print_tokens (page .tokens , text )
68
83
69
- # Currently supported in version `pretrained-ocr-v1.1-2022-09-12`
84
+ if page .symbols :
85
+ print_symbols (page .symbols , text )
86
+
70
87
if page .image_quality_scores :
71
88
print_image_quality_scores (page .image_quality_scores )
72
89
90
+ if document .text_styles :
91
+ print_styles (document .text_styles , text )
92
+
73
93
74
94
def print_page_dimensions (dimension : documentai .Document .Page .Dimension ) -> None :
75
95
print (f" Width: { str (dimension .width )} " )
@@ -81,8 +101,15 @@ def print_detected_langauges(
81
101
) -> None :
82
102
print (" Detected languages:" )
83
103
for lang in detected_languages :
84
- code = lang .language_code
85
- print (f" { code } ({ lang .confidence :.1%} confidence)" )
104
+ print (f" { lang .language_code } ({ lang .confidence :.1%} confidence)" )
105
+
106
+
107
+ def print_blocks (blocks : Sequence [documentai .Document .Page .Block ], text : str ) -> None :
108
+ print (f" { len (blocks )} blocks detected:" )
109
+ first_block_text = layout_to_text (blocks [0 ].layout , text )
110
+ print (f" First text block: { repr (first_block_text )} " )
111
+ last_block_text = layout_to_text (blocks [- 1 ].layout , text )
112
+ print (f" Last text block: { repr (last_block_text )} " )
86
113
87
114
88
115
def print_paragraphs (
@@ -95,14 +122,6 @@ def print_paragraphs(
95
122
print (f" Last paragraph text: { repr (last_paragraph_text )} " )
96
123
97
124
98
- def print_blocks (blocks : Sequence [documentai .Document .Page .Block ], text : str ) -> None :
99
- print (f" { len (blocks )} blocks detected:" )
100
- first_block_text = layout_to_text (blocks [0 ].layout , text )
101
- print (f" First text block: { repr (first_block_text )} " )
102
- last_block_text = layout_to_text (blocks [- 1 ].layout , text )
103
- print (f" Last text block: { repr (last_block_text )} " )
104
-
105
-
106
125
def print_lines (lines : Sequence [documentai .Document .Page .Line ], text : str ) -> None :
107
126
print (f" { len (lines )} lines detected:" )
108
127
first_line_text = layout_to_text (lines [0 ].layout , text )
@@ -123,6 +142,16 @@ def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) ->
123
142
print (f" Last token break type: { repr (last_token_break_type )} " )
124
143
125
144
145
+ def print_symbols (
146
+ symbols : Sequence [documentai .Document .Page .Symbol ], text : str
147
+ ) -> None :
148
+ print (f" { len (symbols )} symbols detected:" )
149
+ first_symbol_text = layout_to_text (symbols [0 ].layout , text )
150
+ print (f" First symbol text: { repr (first_symbol_text )} " )
151
+ last_symbol_text = layout_to_text (symbols [- 1 ].layout , text )
152
+ print (f" Last symbol text: { repr (last_symbol_text )} " )
153
+
154
+
126
155
def print_image_quality_scores (
127
156
image_quality_scores : documentai .Document .Page .ImageQualityScores ,
128
157
) -> None :
@@ -133,6 +162,19 @@ def print_image_quality_scores(
133
162
print (f" { detected_defect .type_ } : { detected_defect .confidence :.1%} " )
134
163
135
164
165
+ def print_styles (styles : Sequence [documentai .Document .Style ], text : str ) -> None :
166
+ print (f" { len (styles )} styles detected:" )
167
+ first_style_text = layout_to_text (styles [0 ].layout , text )
168
+ print (f" First style text: { repr (first_style_text )} " )
169
+ print (f" Color: { styles [0 ].color } " )
170
+ print (f" Background Color: { styles [0 ].background_color } " )
171
+ print (f" Font Weight: { styles [0 ].font_weight } " )
172
+ print (f" Text Style: { styles [0 ].text_style } " )
173
+ print (f" Text Decoration: { styles [0 ].text_decoration } " )
174
+ print (f" Font Size: { styles [0 ].font_size .size } { styles [0 ].font_size .unit } " )
175
+ print (f" Font Family: { styles [0 ].font_family } " )
176
+
177
+
136
178
# [END documentai_process_ocr_document]
137
179
# [START documentai_process_form_document]
138
180
def process_document_form_sample (
@@ -179,6 +221,16 @@ def process_document_form_sample(
179
221
value = layout_to_text (field .field_value , text )
180
222
print (f" * { repr (name .strip ())} : { repr (value .strip ())} " )
181
223
224
+ # Supported in version `pretrained-form-parser-v2.0-2022-11-10` and later.
225
+ # For more information: https://cloud.google.com/document-ai/docs/form-parser
226
+ if document .entities :
227
+ print (f"Found { len (document .entities )} generic entities:" )
228
+ for entity in document .entities :
229
+ print_entity (entity )
230
+ # Print Nested Entities
231
+ for prop in entity .properties :
232
+ print_entity (prop )
233
+
182
234
return document
183
235
184
236
@@ -194,37 +246,6 @@ def print_table_rows(
194
246
195
247
196
248
# [END documentai_process_form_document]
197
- # [START documentai_process_quality_document]
198
- def process_document_quality_sample (
199
- project_id : str ,
200
- location : str ,
201
- processor_id : str ,
202
- processor_version : str ,
203
- file_path : str ,
204
- mime_type : str ,
205
- ) -> None :
206
- # Online processing request to Document AI
207
- document = process_document (
208
- project_id , location , processor_id , processor_version , file_path , mime_type
209
- )
210
-
211
- # Read the quality-specific information from the output from the
212
- # Intelligent Document Quality Processor:
213
- # https://cloud.google.com/document-ai/docs/processors-list#processor_doc-quality-processor
214
- # OCR and other data is also present in the quality processor's response.
215
- # Please see the OCR and other samples for how to parse other data in the
216
- # response.
217
- for entity in document .entities :
218
- conf_percent = f"{ entity .confidence :.1%} "
219
- page_num = str (int (entity .page_anchor .page_refs [0 ].page ) + 1 )
220
- print (f"\n Page { page_num } has a quality score of { conf_percent } " )
221
-
222
- for prop in entity .properties :
223
- conf_percent = f"{ prop .confidence :.1%} "
224
- print (f" * { prop .type_ } score of { conf_percent } " )
225
-
226
-
227
- # [END documentai_process_quality_document]
228
249
229
250
230
251
# [START documentai_process_specialized_document]
@@ -241,8 +262,7 @@ def process_document_specialized_sample(
241
262
project_id , location , processor_id , processor_version , file_path , mime_type
242
263
)
243
264
244
- # Extract entities from a specialized document
245
- # Most specalized processors follow a similar pattern.
265
+ # Print extracted entities from entity extraction processor output.
246
266
# For a complete list of processors see:
247
267
# https://cloud.google.com/document-ai/docs/processors-list
248
268
#
@@ -257,6 +277,7 @@ def process_document_specialized_sample(
257
277
print_entity (prop )
258
278
259
279
280
+ # [START documentai_process_form_document]
260
281
def print_entity (entity : documentai .Document .Entity ) -> None :
261
282
# Fields detected. For a full list of fields for each processor see
262
283
# the processor documentation:
@@ -274,6 +295,7 @@ def print_entity(entity: documentai.Document.Entity) -> None:
274
295
print (f" * Normalized Value: { repr (normalized_value )} " )
275
296
276
297
298
+ # [END documentai_process_form_document]
277
299
# [END documentai_process_specialized_document]
278
300
279
301
@@ -316,22 +338,18 @@ def page_refs_to_string(
316
338
page_refs : Sequence [documentai .Document .PageAnchor .PageRef ],
317
339
) -> str :
318
340
"""Converts a page ref to a string describing the page or page range."""
319
- if len (page_refs ) == 1 :
320
- num = str (int (page_refs [0 ].page ) + 1 )
321
- return f"page { num } is"
322
-
323
- nums = ""
324
- for page_ref in page_refs :
325
- nums += f"{ int (page_ref .page ) + 1 } , "
326
- return f"pages { nums [:- 2 ]} are"
341
+ pages = [str (int (page_ref .page ) + 1 ) for page_ref in page_refs ]
342
+ if len (pages ) == 1 :
343
+ return f"page { pages [0 ]} is"
344
+ else :
345
+ return f"pages { ', ' .join (pages )} are"
327
346
328
347
329
348
# [END documentai_process_splitter_document]
330
349
331
350
332
351
# [START documentai_process_ocr_document]
333
352
# [START documentai_process_form_document]
334
- # [START documentai_process_quality_document]
335
353
# [START documentai_process_specialized_document]
336
354
# [START documentai_process_splitter_document]
337
355
def process_document (
@@ -341,6 +359,7 @@ def process_document(
341
359
processor_version : str ,
342
360
file_path : str ,
343
361
mime_type : str ,
362
+ process_options : Optional [documentai .ProcessOptions ] = None ,
344
363
) -> documentai .Document :
345
364
# You must set the `api_endpoint` if you use a location other than "us".
346
365
client = documentai .DocumentProcessorServiceClient (
@@ -364,6 +383,8 @@ def process_document(
364
383
request = documentai .ProcessRequest (
365
384
name = name ,
366
385
raw_document = documentai .RawDocument (content = image_content , mime_type = mime_type ),
386
+ # Only supported for Document OCR processor
387
+ process_options = process_options ,
367
388
)
368
389
369
390
result = client .process_document (request = request )
@@ -373,7 +394,6 @@ def process_document(
373
394
return result .document
374
395
375
396
376
- # [END documentai_process_quality_document]
377
397
# [END documentai_process_specialized_document]
378
398
# [END documentai_process_splitter_document]
379
399
def layout_to_text (layout : documentai .Document .Page .Layout , text : str ) -> str :
@@ -382,14 +402,12 @@ def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
382
402
offsets in the entirety of the document"s text. This function converts
383
403
offsets to a string.
384
404
"""
385
- response = ""
386
405
# If a text segment spans several lines, it will
387
406
# be stored in different text segments.
388
- for segment in layout .text_anchor .text_segments :
389
- start_index = int (segment .start_index )
390
- end_index = int (segment .end_index )
391
- response += text [start_index :end_index ]
392
- return response
407
+ return "" .join (
408
+ text [int (segment .start_index ) : int (segment .end_index )]
409
+ for segment in layout .text_anchor .text_segments
410
+ )
393
411
394
412
395
413
# [END documentai_process_form_document]
0 commit comments