10000 feat: add `symbols` field, and auto-format comments (#277) · googleapis/google-cloud-python@274d991 · GitHub
[go: up one dir, main page]

Skip to content

Commit 274d991

Browse files
feat: add symbols field, and auto-format comments (#277)
* feat: add `symbols` field, and auto-format comments PiperOrigin-RevId: 428799963 Source-Link: googleapis/googleapis@8b6181f Source-Link: googleapis/googleapis-gen@2c320a9 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMmMzMjBhOTU2MTYwZWU4NmNiOGQ3OTIzZTM3YTFhM2E0NTFkMTA0MiJ9 * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * feat: add `symbols` field, and auto-format comments PiperOrigin-RevId: 428800506 Source-Link: googleapis/googleapis@ea20380 Source-Link: googleapis/googleapis-gen@8b9ece5 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiOGI5ZWNlNTA4YzEzMjUxNmY0YmY3OWU0NjJmMDgyNTNhY2M4NTkzNSJ9 * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 4db5614 commit 274d991

File tree

2 files changed

+108
-41
lines changed
  • packages/google-cloud-documentai/google/cloud

2 files changed

+108
-41
lines changed

packages/google-cloud-documentai/google/cloud/documentai_v1/types/document.py

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ class Page(proto.Message):
224224
form_fields (Sequence[google.cloud.documentai_v1.types.Document.Page.FormField]):
225225
A list of visually detected form fields on
226226
the page.
227+
symbols (Sequence[google.cloud.documentai_v1.types.Document.Page.Symbol]):
228+
A list of visually detected symbols on the
229+
page.
227230
provenance (google.cloud.documentai_v1.types.Document.Provenance):
228231
The history of this page.
229232
"""
@@ -454,6 +457,26 @@ class Type(proto.Enum):
454457
proto.MESSAGE, number=4, message="Document.Provenance",
455458
)
456459

460+
class Symbol(proto.Message):
461+
r"""A detected symbol.
462+
463+
Attributes:
464+
layout (google.cloud.documentai_v1.types.Document.Page.Layout):
465+
[Layout][google.cloud.documentai.v1.Document.Page.Layout]
466+
for
467+
[Symbol][google.cloud.documentai.v1.Document.Page.Symbol].
468+
detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]):
469+
A list of detected languages together with
470+
confidence.
471+
"""
472+
473+
layout = proto.Field(
474+
proto.MESSAGE, number=1, message="Document.Page.Layout",
475+
)
476+
detected_languages = proto.RepeatedField(
477+
proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage",
478+
)
479+
457480
class VisualElement(proto.Message):
458481
r"""Detected non-text visual elements e.g. checkbox, signature
459482
etc. on the page.
@@ -602,7 +625,7 @@ class DetectedLanguage(proto.Message):
602625
language_code (str):
603626
The BCP-47 language code, such as "en-US" or "sr-Latn". For
604627
more information, see
605-
http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
628+
https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
606629
confidence (float):
607630
Confidence of detected language. Range [0, 1].
608631
"""
@@ -643,13 +666,17 @@ class DetectedLanguage(proto.Message):
643666
form_fields = proto.RepeatedField(
644667
proto.MESSAGE, number=11, message="Document.Page.FormField",
645668
)
669+
symbols = proto.RepeatedField(
670+
proto.MESSAGE, number=12, message="Document.Page.Symbol",
671+
)
646672
provenance = proto.Field(
647673
proto.MESSAGE, number=16, message="Document.Provenance",
648674
)
649675

650676
class Entity(proto.Message):
651-
r"""A phrase in the text that is a known entity type, such as a
652-
person, an organization, or location.
677+
r"""An entity that could be a phrase in the text or a property
678+
that belongs to the document. It is a known entity type, such as
679+
a person, an organization, or location.
653680
654681
Attributes:
655682
text_anchor (google.cloud.documentai_v1.types.Document.TextAnchor):
@@ -660,7 +687,8 @@ class Entity(proto.Message):
660687
Entity type from a schema e.g. ``Address``.
661688
mention_text (str):
662689
Optional. Text value in the document e.g.
663-
``1600 Amphitheatre Pkwy``.
690+
``1600 Amphitheatre Pkwy``. If the entity is not present in
691+
the document, this field will be empty.
664692
mention_id (str):
665693
Optional. Deprecated. Use ``id`` field instead.
666694
confidence (float):
@@ -729,10 +757,14 @@ class NormalizedValue(proto.Message):
729757
730758
This field is a member of `oneof`_ ``structured_value``.
731759
text (str):
732-
Required. Normalized entity value stored as a string. This
733-
field is populated for supported document type (e.g.
734-
Invoice). For some entity types, one of respective
735-
'structured_value' fields may also be populated.
760+
Optional. An optional field to store a normalized string.
761+
For some entity types, one of respective
762+
``structured_value`` fields may also be populated. Also not
763+
all the types of ``structured_value`` will be normalized.
764+
For example, some processors may not generate float or int
765+
normalized text by default.
766+
767+
Below are sample formats mapped to structured values.
736768
737769
- Money/Currency type (``money_value``) is in the ISO 4217
738770
text format.
@@ -818,7 +850,8 @@ class TextAnchor(proto.Message):
818850
[Document.text][google.cloud.documentai.v1.Document.text].
819851
content (str):
820852
Contains the content of the text span so that users do not
821-
have to look it up in the text_segments.
853+
have to look it up in the text_segments. It is always
854+
populated for formFields.
822855
"""
823856

824857
class TextSegment(proto.Message):
@@ -942,18 +975,18 @@ class OperationType(proto.Enum):
942975
EVAL_SKIPPED = 6
943976

944977
class Parent(proto.Message):
945-
r"""Structure for referencing parent provenances. When an
946-
element replaces one of more other elements parent references
947-
identify the elements that are replaced.
978+
r"""The parent element the current element is based on. Used for
979+
referencing/aligning, removal and replacement operations.
948980
949981
Attributes:
950982
revision (int):
951-
The index of the [Document.revisions] identifying the parent
952-
revision.
983+
The index of the index into current revision's parent_ids
984+
list.
953985
index (int):
954-
The index of the parent revisions
955-
corresponding collection of items (eg. list of
956-
entities, properties within entities, etc.)
986+
The index of the parent item in the
987+
corresponding item list (eg. list of entities,
988+
properties within entities, etc.) in the parent
989+
revision.
957990
id (int):
958991
The id of the parent provenance.
959992
"""

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document.py

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class Document(proto.Message):
6868
Optional. UTF-8 encoded text in reading order
6969
from the document.
7070
text_styles (Sequence[google.cloud.documentai_v1beta3.types.Document.Style]):
71-
Styles for the
71+
Placeholder. Styles for the
7272
[Document.text][google.cloud.documentai.v1beta3.Document.text].
7373
pages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page]):
7474
Visual page layout for the
@@ -79,13 +79,13 @@ class Document(proto.Message):
7979
For document shards, entities in this list may cross shard
8080
boundaries.
8181
entity_relations (Sequence[google.cloud.documentai_v1beta3.types.Document.EntityRelation]):
82-
Relationship among
82+
Placeholder. Relationship among
8383
[Document.entities][google.cloud.documentai.v1beta3.Document.entities].
8484
text_changes (Sequence[google.cloud.documentai_v1beta3.types.Document.TextChange]):
85-
A list of text corrections made to [Document.text]. This is
86-
usually used for annotating corrections to OCR mistakes.
87-
Text changes for a given revision may not overlap with each
88-
other.
85+
Placeholder. A list of text corrections made to
86+
[Document.text]. This is usually used for annotating
87+
corrections to OCR mistakes. Text changes for a given
88+
revision may not overlap with each other.
8989
shard_info (google.cloud.documentai_v1beta3.types.Document.ShardInfo):
9090
Information about the sharding if this
9191
document is sharded part of a larger document.
@@ -95,7 +95,8 @@ class Document(proto.Message):
9595
Any error that occurred while processing this
9696
document.
9797
revisions (Sequence[google.cloud.documentai_v1beta3.types.Document.Revision]):
98-
Revision history of this document.
98+
Placeholder. Revision history of this
99+
document.
99100
"""
100101

101102
class ShardInfo(proto.Message):
@@ -224,6 +225,9 @@ class Page(proto.Message):
224225
form_fields (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.FormField]):
225226
A list of visually detected form fields on
226227
the page.
228+
symbols (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Symbol]):
229+
A list of visually detected symbols on the
230+
page.
227231
provenance (google.cloud.documentai_v1beta3.types.Document.Provenance):
228232
The history of this page.
229233
"""
@@ -457,6 +461,26 @@ class Type(proto.Enum):
457461
proto.MESSAGE, number=4, message="Document.Provenance",
458462
)
459463

464+
class Symbol(proto.Message):
465+
r"""A detected symbol.
466+
467+
Attributes:
468+
layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout):
469+
[Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]
470+
for
471+
[Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
472+
detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]):
473+
A list of detected languages together with
474+
confidence.
475+
"""
476+
477+
layout = proto.Field(
478+
proto.MESSAGE, number=1, message="Document.Page.Layout",
479+
)
480+
detected_languages = proto.RepeatedField(
481+
proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage",
482+
)
483+
460484
class VisualElement(proto.Message):
461485
r"""Detected non-text visual elements e.g. checkbox, signature
462486
etc. on the page.
@@ -606,7 +630,7 @@ class DetectedLanguage(proto.Message):
606630
language_code (str):
607631
The BCP-47 language code, such as "en-US" or "sr-Latn". For
608632
more information, see
609-
http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
633+
https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
610634
confidence (float):
611635
Confidence of detected language. Range [0, 1].
612636
"""
@@ -647,13 +671,17 @@ class DetectedLanguage(proto.Message):
647671
form_fields = proto.RepeatedField(
648672
proto.MESSAGE, number=11, message="Document.Page.FormField",
649673
)
674+
symbols = proto.RepeatedField(
675+
proto.MESSAGE, number=12, message="Document.Page.Symbol",
676+
)
650677
provenance = proto.Field(
651678
proto.MESSAGE, number=16, message="Document.Provenance",
652679
)
653680

654681
class Entity(proto.Message):
655-
r"""A phrase in the text that is a known entity type, such as a
656-
person, an organization, or location.
682+
r"""An entity that could be a phrase in the text or a property
683+
that belongs to the document. It is a known entity type, such as
684+
a person, an organization, or location.
657685
658686
Attributes:
659687
text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor):
@@ -664,7 +692,8 @@ class Entity(proto.Message):
664692
Entity type from a schema e.g. ``Address``.
665693
mention_text (str):
666694
Optional. Text value in the document e.g.
667-
``1600 Amphitheatre Pkwy``.
695+
``1600 Amphitheatre Pkwy``. If the entity is not present in
696+
the document, this field will be empty.
668697
mention_id (str):
669698
Optional. Deprecated. Use ``id`` field instead.
670699
confidence (float):
@@ -733,10 +762,14 @@ class NormalizedValue(proto.Message):
733762
734763
This field is a member of `oneof`_ ``structured_value``.
735764
text (str):
736-
Required. Normalized entity value stored as a string. This
737-
field is populated for supported document type (e.g.
738-
Invoice). For some entity types, one of respective
739-
'structured_value' fields may also be populated.
765+
Optional. An optional field to store a normalized string.
766+
For some entity types, one of respective
767+
``structured_value`` fields may also be populated. Also not
768+
all the types of ``structured_value`` will be normalized.
769+
For example, some processors may not generate float or int
770+
normalized text by default.
771+
772+
Below are sample formats mapped to structured values.
740773
741774
- Money/Currency type (``money_value``) is in the ISO 4217
742775
text format.
@@ -822,7 +855,8 @@ class TextAnchor(proto.Message):
822855
[Document.text][google.cloud.documentai.v1beta3.Document.text].
823856
content (str):
824857
Contains the content of the text span so that users do not
825-
have to look it up in the text_segments.
858+
have to look it up in the text_segments. It is always
859+
populated for formFields.
826860
"""
827861

828862
class TextSegment(proto.Message):
@@ -946,18 +980,18 @@ class OperationType(proto.Enum):
946980
EVAL_SKIPPED = 6
947981

948982
class Parent(proto.Message):
949-
r"""Structure for referencing parent provenances. When an
950-
element replaces one of more other elements parent references
951-
identify the elements that are replaced.
983+
r"""The parent element the current element is based on. Used for
984+
referencing/aligning, removal and replacement operations.
952985
953986
Attributes:
954987
revision (int):
955-
The index of the [Document.revisions] identifying the parent
956-
revision.
988+
The index of the index into current revision's parent_ids
989+
list.
957990
index (int):
958-
The index of the parent revisions
959-
corresponding collection of items (eg. list of
960-
entities, properties within entities, etc.)
991+
The index of the parent item in the
992+
corresponding item list (eg. list of entities,
993+
properties within entities, etc.) in the parent
994+
revision.
961995
id (int):
962996
The id of the parent provenance.
963997
"""

0 commit comments

Comments
 (0)
0