8000 feat: Extended DB API parameter syntax to optionally provide paramete… · googleapis/python-bigquery@8bcf397 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8bcf397

Browse files
Jim Fultontswast
andauthored
feat: Extended DB API parameter syntax to optionally provide parameter types (#626)
* Added explicit type documentation. * Extended query-parameter system for specifying parameter types.assed. * Serialize non-floats (e.g. Decimals) using in FLOAT64 parameters. Co-authored-by: Tim Swast <swast@google.com> * De-reference aliases in SqlParameterScalarTypes when checking types Co-authored-by: Tim Swast <swast@google.com>
1 parent c085186 commit 8bcf397

File tree

8 files changed

+396
-60
lines changed

8 files changed

+396
-60
lines changed

docs/dbapi.rst

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,40 @@ DB-API Reference
44
.. automodule:: google.cloud.bigquery.dbapi
55
:members:
66
:show-inheritance:
7+
8+
9+
DB-API Query-Parameter Syntax
10+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11+
12+
The BigQuery DB-API uses the `qmark` `parameter style
13+
<https://www.python.org/dev/peps/pep-0249/#paramstyle>`_ for
14+
unnamed/positional parameters and the `pyformat` parameter style for
15+
named parameters.
16+
17+
An example of a query using unnamed parameters::
18+
19+
insert into people (name, income) values (?, ?)
20+
21+
and using named parameters::
22+
23+
insert into people (name, income) values (%(name)s, %(income)s)
24+
25+
Providing explicit type information
26+
-----------------------------------
27+
28+
BigQuery requires type information for parameters. The The BigQuery
29+
DB-API can usually determine parameter types for parameters based on
30+
provided values. Sometimes, however, types can't be determined (for
31+
example when `None` is passed) or are determined incorrectly (for
32+
example when passing a floating-point value to a numeric column).
33+
34+
The BigQuery DB-API provides an extended parameter syntax. For named
35+
parameters, a BigQuery type is provided after the name separated by a
36+
colon, as in::
37+
38+
insert into people (name, income) values (%(name:string)s, %(income:numeric)s)
39+
40+
For unnamed parameters, use the named syntax with a type, but now
41+
name, as in::
42+
43+
insert into people (name, income) values (%(:string)s, %(:numeric)s)

google/cloud/bigquery/_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def _int_to_json(value):
275275

276276
def _float_to_json(value):
277277
"""Coerce 'value' to an JSON-compatible representation."""
278-
return value
278+
return value if value is None else float(value)
279279

280280

281281
def _decimal_to_json(value):

google/cloud/bigquery/dbapi/_helpers.py

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,36 @@
2020
import numbers
2121

2222
from google.cloud import bigquery
23-
from google.cloud.bigquery import table
23+
from google.cloud.bigquery import table, enums
2424
from google.cloud.bigquery.dbapi import exceptions
2525

2626

2727
_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28")
2828
_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28")
2929

3030

31-
def scalar_to_query_parameter(value, name=None):
31+
def _parameter_type(name, value, query_parameter_type=None, value_doc=""):
32+
if query_parameter_type:
33+
try:
34+
parameter_type = getattr(
35+
enums.SqlParameterScalarTypes, query_parameter_type.upper()
36+
)._type
37+
except AttributeError:
38+
raise exceptions.ProgrammingError(
39+
f"The given parameter type, {query_parameter_type},"
40+
f" for {name} is not a valid BigQuery scalar type."
41+
)
42+
else:
43+
parameter_type = bigquery_scalar_type(value)
44+
if parameter_type is None:
45+
raise exceptions.ProgrammingError(
46+
f"Encountered parameter {name} with "
47+
f"{value_doc} value {value} of unexpected type."
48+
)
49+
return parameter_type
50+
51+
52+
def scalar_to_query_parameter(value, name=None, query_parameter_type=None):
3253
"""Convert a scalar value into a query parameter.
3354
3455
Args:
@@ -37,6 +58,7 @@ def scalar_to_query_parameter(value, name=None):
3758
3859
name (str):
39< A93C /code>60
(Optional) Name of the query parameter.
61+
query_parameter_type (Optional[str]): Given type for the parameter.
4062
4163
Returns:
4264
google.cloud.bigquery.ScalarQueryParameter:
@@ -47,24 +69,19 @@ def scalar_to_query_parameter(value, name=None):
4769
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
4870
if the type cannot be determined.
4971
"""
50-
parameter_type = bigquery_scalar_type(value)
51-
52-
if parameter_type is None:
53-
raise exceptions.ProgrammingError(
54-
"encountered parameter {} with value {} of unexpected type".format(
55-
name, value
56-
)
57-
)
58-
return bigquery.ScalarQueryParameter(name, parameter_type, value)
72+
return bigquery.ScalarQueryParameter(
73+
name, _parameter_type(name, value, query_parameter_type), value
74+
)
5975

6076

61-
def array_to_query_parameter(value, name=None):
77+
def array_to_query_parameter(value, name=None, query_parameter_type=None):
6278
"""Convert an array-like value into a query parameter.
6379
6480
Args:
6581
value (Sequence[Any]): The elements of the array (should not be a
6682
string-like Sequence).
6783
name (Optional[str]): Name of the query parameter.
84+
query_parameter_type (Optional[str]): Given type for the parameter.
6885
6986
Returns:
7087
A query parameter corresponding with the type and value of the plain
@@ -80,53 +97,58 @@ def array_to_query_parameter(value, name=None):
8097
"not string-like.".format(name)
8198
)
8299

83-
if not value:
100+
if query_parameter_type or value:
101+
array_type = _parameter_type(
102+
name,
103+
value[0] if value else None,
104+
query_parameter_type,
105+
value_doc="array element ",
106+
)
107+
else:
84108
rai A93C se exceptions.ProgrammingError(
85109
"Encountered an empty array-like value of parameter {}, cannot "
86110
"determine array elements type.".format(name)
87111
)
88112

89-
# Assume that all elements are of the same type, and let the backend handle
90-
# any type incompatibilities among the array elements
91-
array_type = bigquery_scalar_type(value[0])
92-
if array_type is None:
93-
raise exceptions.ProgrammingError(
94-
"Encountered unexpected first array element of parameter {}, "
95-
"cannot determine array elements type.".format(name)
96-
)
97-
98113
return bigquery.ArrayQueryParameter(name, array_type, value)
99114

100115

101-
def to_query_parameters_list(parameters):
116+
def to_query_parameters_list(parameters, parameter_types):
102117
"""Converts a sequence of parameter values into query parameters.
103118
104119
Args:
105120
parameters (Sequence[Any]): Sequence of query parameter values.
121+
parameter_types:
122+
A list of parameter types, one for each parameter.
123+
Unknown types are provided as None.
106124
107125
Returns:
108126
List[google.cloud.bigquery.query._AbstractQueryParameter]:
109127
A list of query parameters.
110128
"""
111129
result = []
112130

113-
for value in parameters:
131+
for value, type_ in zip(parameters, parameter_types):
114132
if isinstance(value, collections_abc.Mapping):
115133
raise NotImplementedError("STRUCT-like parameter values are not supported.")
116134
elif array_like(value):
117-
param = array_to_query_parameter(value)
135+
param = array_to_query_parameter(value, None, type_)
118136
else:
119-
param = scalar_to_query_parameter(value)
137+
param = scalar_to_query_parameter(value, None, type_)
138+
120139
result.append(param)
121140

122141
return result
123142

124143

125-
def to_query_parameters_dict(parameters):
144+
def to_query_parameters_dict(parameters, query_parameter_types):
126145
"""Converts a dictionary of parameter values into query parameters.
127146
128147
Args:
129148
parameters (Mapping[str, Any]): Dictionary of query parameter values.
149+
parameter_types:
150+
A dictionary of parameter types. It needn't have a key for each
151+
parameter.
130152
131153
Returns:
132154
List[google.cloud.bigquery.query._AbstractQueryParameter]:
@@ -140,21 +162,38 @@ def to_query_parameters_dict(parameters):
140162
"STRUCT-like parameter values are not supported "
141163
"(parameter {}).".format(name)
142164
)
143-
elif array_like(value):
144-
param = array_to_query_parameter(value, name=name)
145165
else:
146-
param = scalar_to_query_parameter(value, name=name)
166+
query_parameter_type = query_parameter_types.get(name)
167+
if array_like(value):
168+
param = array_to_query_parameter(
169+
value, name=name, query_parameter_type=query_parameter_type
170+
)
171+
else:
172+
param = scalar_to_query_parameter(
173+
value, name=name, query_parameter_type=query_parameter_type,
174+
)
175+
147176
result.append(param)
148177

149178
return result
150179

151180

152-
def to_query_parameters(parameters):
181+
def to_query_parameters(parameters, parameter_types):
153182
"""Converts DB-API parameter values into query parameters.
154183
155184
Args:
156185
parameters (Union[Mapping[str, Any], Sequence[Any]]):
157186
A dictionary or sequence of query parameter values.
187+
parameter_types (Union[Mapping[str, str], Sequence[str]]):
188+
A dictionary or list of parameter types.
189+
190+
If parameters is a mapping, then this must be a dictionary
191+
of parameter types. It needn't have a key for each
192+
parameter.
193+
194+
If parameters is a sequence, then this must be a list of
195+
parameter types, one for each paramater. Unknown types
196+
are provided as None.
158197
159198
Returns:
160199
List[google.cloud.bigquery.query._AbstractQueryParameter]:
@@ -164,9 +203,9 @@ def to_query_parameters(parameters):
164203
return []
165204

166205
if isinstance(parameters, collections_abc.Mapping):
167-
return to_query_parameters_dict(parameters)
168-
169-
return to_query_parameters_list(parameters)
206+
return to_query_parameters_dict(parameters, parameter_types)
207+
else:
208+
return to_query_parameters_list(parameters, parameter_types)
170209

171210

172211
def bigquery_scalar_type(value):

google/cloud/bigquery/dbapi/cursor.py

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from collections import abc as collections_abc
1919
import copy
2020
import logging
21+
import re
2122

2223
try:
2324
from google.cloud.bigquery_storage import ArrowSerializationOptions
@@ -161,6 +162,14 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
161162
job_config (google.cloud.bigquery.job.QueryJobConfig):
162163
(Optional) Extra configuration options for the query job.
163164
"""
165+
formatted_operation, parameter_types = _format_operation(operation, parameters)
166+
self._execute(
167+
formatted_operation, parameters, job_id, job_config, parameter_types
168+
)
169+
170+
def _execute(
171+
self, formatted_operation, parameters, job_id, job_config, parameter_types
172+
):
164173
self._query_data = None
165174
self._query_job = None
166175
client = self.connection._client
@@ -169,8 +178,7 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
169178
# query parameters was not one of the standard options. Convert both
170179
# the query and the parameters to the format expected by the client
171180
# libraries.
172-
formatted_operation = _format_operation(operation, parameters=parameters)
173-
query_parameters = _helpers.to_query_parameters(parameters)
181+
query_parameters = _helpers.to_query_parameters(parameters, parameter_types)
174182

175183
if client._default_query_job_config:
176184
if job_config:
@@ -209,8 +217,19 @@ def executemany(self, operation, seq_of_parameters):
209217
seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]):
210218
Sequence of many sets of parameter values.
211219
"""
212-
for parameters in seq_of_parameters:
213-
self.execute(operation, parameters)
220+
if seq_of_parameters:
221+
# There's no reason to format the line more than once, as
222+
# the operation only barely depends on the parameters. So
223+
# we just use the first set of parameters. If there are
224+
# different numbers or types of parameters, we'll error
225+
# anyway.
226+
formatted_operation, parameter_types = _format_operation(
227+
operation, seq_of_parameters[0]
228+
)
229+
for parameters in seq_of_parameters:
230+
self._execute(
231+
formatted_operation, parameters, None, None, parameter_types
232+
)
214233

215234
def _try_fetch(self, size=None):
216235
"""Try to start fetching data, if not yet started.
@@ -427,7 +446,7 @@ def _format_operation_dict(operation, parameters):
427446
raise exceptions.ProgrammingError(exc)
428447

429448

430-
def _format_operation(operation, parameters=None):
449+
def _format_operation(operation, parameters):
431450
"""Formats parameters in operation in way BigQuery expects.
432451
433452
Args:
@@ -445,9 +464,67 @@ def _format_operation(operation, parameters=None):
445464
``parameters`` argument.
446465
"""
447466
if parameters is None or len(parameters) == 0:
448-
return operation.replace("%%", "%") # Still do percent de-escaping.
467+
return operation.replace("%%", "%"), None # Still do percent de-escaping.
468+
469+
operation, parameter_types = _extract_types(operation)
470+
if parameter_types is None:
471+
raise exceptions.ProgrammingError(
472+
f"Parameters were provided, but {repr(operation)} has no placeholders."
473+
)
449474

450475
if isinstance(parameters, collections_abc.Mapping):
451-
return _format_operation_dict(operation, parameters)
476+
return _format_operation_dict(operation, parameters), parameter_types
477+
478+
return _format_operation_list(operation, parameters), parameter_types
479+
480+
481+
def _extract_types(
482+
operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub
483+
):
484+
"""Remove type information from parameter placeholders.
485+
486+
For every parameter of the form %(name:type)s, replace with %(name)s and add the
487+
item name->type to dict that's returned.
488+
489+
Returns operation without type information and a dictionary of names and types.
490+
"""
491+
parameter_types = None
492+
493+
def repl(m):
494+
nonlocal parameter_types
495+
prefix, name, type_ = m.groups()
496+
if len(prefix) % 2:
497+
# The prefix has an odd number of %s, the last of which
498+
# escapes the % we're looking for, so we don't want to
499+
# change anything.
500+
return m.group(0)
501+
502+
try:
503+
if name:
504+
if not parameter_types:
505+
parameter_types = {}
506+
if type_:
507+
if name in parameter_types:
508+
if type_ != parameter_types[name]:
509+
raise exceptions.ProgrammingError(
510+
f"Conflicting types for {name}: "
511+
f"{parameter_types[name]} and {type_}."
512+
)
513+
else:
514+
parameter_types[name] = type_
515+
else:
516+
if not isinstance(parameter_types, dict):
517+
raise TypeError()
518+
519+
return f"{prefix}%({name})s"
520+
else:
521+
if parameter_types is None:
522+
parameter_types = []
523+
parameter_types.append(type_)
524+
return f"{prefix}%s"
525+
except (AttributeError, TypeError):
526+
raise exceptions.ProgrammingError(
527+
f"{repr(operation)} mixes named and unamed parameters."
528+
)
452529

453-
return _format_operation_list(operation, parameters)
530+
return extra_type_sub(repl, operation), parameter_types

0 commit comments

Comments
 (0)
0