@@ -68,7 +68,7 @@ class Document(proto.Message):
68
68
Optional. UTF-8 encoded text in reading order
69
69
from the document.
70
70
text_styles (Sequence[google.cloud.documentai_v1beta3.types.Document.Style]):
71
- Styles for the
71
+ Placeholder. Styles for the
72
72
[Document.text][google.cloud.documentai.v1beta3.Document.text].
73
73
pages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page]):
74
74
Visual page layout for the
@@ -79,13 +79,13 @@ class Document(proto.Message):
79
79
For document shards, entities in this list may cross shard
80
80
boundaries.
81
81
entity_relations (Sequence[google.cloud.documentai_v1beta3.types.Document.EntityRelation]):
82
- Relationship among
82
+ Placeholder. Relationship among
83
83
[Document.entities][google.cloud.documentai.v1beta3.Document.entities].
84
84
text_changes (Sequence[google.cloud.documentai_v1beta3.types.Document.TextChange]):
85
- A list of text corrections made to [Document.text]. This is
86
- usually used for annotating corrections to OCR mistakes.
87
- Text changes for a given revision may not overlap with each
88
- other.
85
+ Placeholder. A list of text corrections made to
86
+ [Document.text]. This is usually used for annotating
87
+ corrections to OCR mistakes. Text changes for a given
88
+ revision may not overlap with each other.
89
89
shard_info (google.cloud.documentai_v1beta3.types.Document.ShardInfo):
90
90
Information about the sharding if this
91
91
document is sharded part of a larger document.
@@ -95,7 +95,8 @@ class Document(proto.Message):
95
95
Any error that occurred while processing this
96
96
document.
97
97
revisions (Sequence[google.cloud.documentai_v1beta3.types.Document.Revision]):
98
- Revision history of this document.
98
+ Placeholder. Revision history of this
99
+ document.
99
100
"""
100
101
101
102
class ShardInfo (proto .Message ):
@@ -224,6 +225,9 @@ class Page(proto.Message):
224
225
form_fields (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.FormField]):
225
226
A list of visually detected form fields on
226
227
the page.
228
+ symbols (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Symbol]):
229
+ A list of visually detected symbols on the
230
+ page.
227
231
provenance (google.cloud.documentai_v1beta3.types.Document.Provenance):
228
232
The history of this page.
229
233
"""
@@ -457,6 +461,26 @@ class Type(proto.Enum):
457
461
proto .MESSAGE , number = 4 , message = "Document.Provenance" ,
458
462
)
459
463
464
+ class Symbol (proto .Message ):
465
+ r"""A detected symbol.
466
+
467
+ Attributes:
468
+ layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout):
469
+ [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]
470
+ for
471
+ [Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
472
+ detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]):
473
+ A list of detected languages together with
474
+ confidence.
475
+ """
476
+
477
+ layout = proto .Field (
478
+ proto .MESSAGE , number = 1 , message = "Document.Page.Layout" ,
479
+ )
480
+ detected_languages = proto .RepeatedField (
481
+ proto .MESSAGE , number = 2 , message = "Document.Page.DetectedLanguage" ,
482
+ )
483
+
460
484
class VisualElement (proto .Message ):
461
485
r"""Detected non-text visual elements e.g. checkbox, signature
462
486
etc. on the page.
@@ -606,7 +630,7 @@ class DetectedLanguage(proto.Message):
606
630
language_code (str):
607
631
The BCP-47 language code, such as "en-US" or "sr-Latn". For
608
632
more information, see
609
- http ://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
633
+ https ://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
610
634
confidence (float):
611
635
Confidence of detected language. Range [0, 1].
612
636
"""
@@ -647,13 +671,17 @@ class DetectedLanguage(proto.Message):
647
671
form_fields = proto .RepeatedField (
648
672
proto .MESSAGE , number = 11 , message = "Document.Page.FormField" ,
649
673
)
674
+ symbols = proto .RepeatedField (
675
+ proto .MESSAGE , number = 12 , message = "Document.Page.Symbol" ,
676
+ )
650
677
provenance = proto .Field (
651
678
proto .MESSAGE , number = 16 , message = "Document.Provenance" ,
652
679
)
653
680
654
681
class Entity (proto .Message ):
655
- r"""A phrase in the text that is a known entity type, such as a
656
- person, an organization, or location.
682
+ r"""An entity that could be a phrase in the text or a property
683
+ that belongs to the document. It is a known entity type, such as
684
+ a person, an organization, or location.
657
685
658
686
Attributes:
659
687
text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor):
@@ -664,7 +692,8 @@ class Entity(proto.Message):
664
692
Entity type from a schema e.g. ``Address``.
665
693
mention_text (str):
666
694
Optional. Text value in the document e.g.
667
- ``1600 Amphitheatre Pkwy``.
695
+ ``1600 Amphitheatre Pkwy``. If the entity is not present in
696
+ the document, this field will be empty.
668
697
mention_id (str):
669
698
Optional. Deprecated. Use ``id`` field instead.
670
699
confidence (float):
@@ -733,10 +762,14 @@ class NormalizedValue(proto.Message):
733
762
734
763
This field is a member of `oneof`_ ``structured_value``.
735
764
text (str):
736
- Required. Normalized entity value stored as a string. This
737
- field is populated for supported document type (e.g.
738
- Invoice). For some entity types, one of respective
739
- 'structured_value' fields may also be populated.
765
+ Optional. An optional field to store a normalized string.
766
+ For some entity types, one of respective
767
+ ``structured_value`` fields may also be populated. Also not
768
+ all the types of ``structured_value`` will be normalized.
769
+ For example, some processors may not generate float or int
770
+ normalized text by default.
771
+
772
+ Below are sample formats mapped to structured values.
740
773
741
774
- Money/Currency type (``money_value``) is in the ISO 4217
742
775
text format.
@@ -822,7 +855,8 @@ class TextAnchor(proto.Message):
822
855
[Document.text][google.cloud.documentai.v1beta3.Document.text].
823
856
content (str):
824
857
Contains the content of the text span so that users do not
825
- have to look it up in the text_segments.
858
+ have to look it up in the text_segments. It is always
859
+ populated for formFields.
826
860
"""
827
861
828
862
class TextSegment (proto .Message ):
@@ -946,18 +980,18 @@ class OperationType(proto.Enum):
946
980
EVAL_SKIPPED = 6
947
981
948
982
class Parent (proto .Message ):
949
- r"""Structure for referencing parent provenances. When an
950
- element replaces one of more other elements parent references
951
- identify the elements that are replaced.
983
+ r"""The parent element the current element is based on. Used for
984
+ referencing/aligning, removal and replacement operations.
952
985
953
986
Attributes:
954
987
revision (int):
955
- The index of the [Document.revisions] identifying the parent
956
- revision .
988
+ The index of the index into current revision's parent_ids
989
+ list .
957
990
index (int):
958
- The index of the parent revisions
959
- corresponding collection of items (eg. list of
960
- entities, properties within entities, etc.)
991
+ The index of the parent item in the
992
+ corresponding item list (eg. list of entities,
993
+ properties within entities, etc.) in the parent
994
+ revision.
961
995
id (int):
962
996
The id of the parent provenance.
963
997
"""
0 commit comments