diff --git a/CHANGELOG.md b/CHANGELOG.md index c24725bef..4a089b8b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17) + + +### Features + +* Add prefer_bqstorage_client option for Connection ([#1945](https://github.com/googleapis/python-bigquery/issues/1945)) ([bfdeb3f](https://github.com/googleapis/python-bigquery/commit/bfdeb3fdbc1d5b26fcd3d1433abfb0be49d12018)) +* Support load job option ColumnNameCharacterMap ([#1952](https://github.com/googleapis/python-bigquery/issues/1952)) ([7e522ee](https://github.com/googleapis/python-bigquery/commit/7e522eea776cd9a74f8078c4236f63d5ff11f20e)) + + +### Bug Fixes + +* Do not overwrite page_size with max_results when start_index is set ([#1956](https://github.com/googleapis/python-bigquery/issues/1956)) ([7d0fcee](https://github.com/googleapis/python-bigquery/commit/7d0fceefdf28278c1f2cdaab571de9b235320998)) + ## [3.24.0](https://github.com/googleapis/python-bigquery/compare/v3.23.1...v3.24.0) (2024-06-04) diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 66dee7dfb..a1a69b8fe 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -35,12 +35,18 @@ class Connection(object): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials as ``client`` (provided that BigQuery Storage dependencies are installed). - - If both clients are available, ``bqstorage_client`` is used for - fetching query results. + prefer_bqstorage_client (Optional[bool]): + Prefer the BigQuery Storage client over the REST client. If Storage + client isn't available, fall back to the REST client. Defaults to + ``True``. """ - def __init__(self, client=None, bqstorage_client=None): + def __init__( + self, + client=None, + bqstorage_client=None, + prefer_bqstorage_client=True, + ): if client is None: client = bigquery.Client() self._owns_client = True @@ -49,7 +55,10 @@ def __init__(self, client=None, bqstorage_client=None): # A warning is already raised by the BQ Storage client factory factory if # instantiation fails, or if the given BQ Storage client instance is outdated. - if bqstorage_client is None: + if not prefer_bqstorage_client: + bqstorage_client = None + self._owns_bqstorage_client = False + elif bqstorage_client is None: bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: @@ -95,7 +104,7 @@ def cursor(self): return new_cursor -def connect(client=None, bqstorage_client=None): +def connect(client=None, bqstorage_client=None, prefer_bqstorage_client=True): """Construct a DB-API connection to Google BigQuery. Args: @@ -108,11 +117,12 @@ def connect(client=None, bqstorage_client=None): A client that uses the faster BigQuery Storage API to fetch rows from BigQuery. If not passed, it is created using the same credentials as ``client`` (provided that BigQuery Storage dependencies are installed). - - If both clients are available, ``bqstorage_client`` is used for - fetching query results. + prefer_bqstorage_client (Optional[bool]): + Prefer the BigQuery Storage client over the REST client. If Storage + client isn't available, fall back to the REST client. Defaults to + ``True``. Returns: google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery. """ - return Connection(client, bqstorage_client) + return Connection(client, bqstorage_client, prefer_bqstorage_client) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 176435456..e56ce16f0 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -32,6 +32,26 @@ from google.cloud.bigquery.query import ConnectionProperty +class ColumnNameCharacterMap: + """Indicates the character map used for column names. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap + """ + + COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED" + """Unspecified column name character map.""" + + STRICT = "STRICT" + """Support flexible column name and reject invalid column names.""" + + V1 = "V1" + """ Support alphanumeric + underscore characters and names must start with + a letter or underscore. Invalid column names will be normalized.""" + + V2 = "V2" + """Support flexible column name. Invalid column names will be normalized.""" + + class LoadJobConfig(_JobConfig): """Configuration options for load jobs. @@ -597,6 +617,27 @@ def parquet_options(self, value): else: self._del_sub_prop("parquetOptions") + @property + def column_name_character_map(self) -> str: + """Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]: + Character map supported for column names in CSV/Parquet loads. Defaults + to STRICT and can be overridden by Project Config Service. Using this + option with unsupported load formats will result in an error. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map + """ + return self._get_sub_prop( + "columnNameCharacterMap", + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) + + @column_name_character_map.setter + def column_name_character_map(self, value: Optional[str]): + if value is None: + value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED + self._set_sub_prop("columnNameCharacterMap", value) + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 8049b748e..4ea5687e0 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1532,8 +1532,9 @@ def result( # type: ignore # (incompatible with supertype) # Setting max_results should be equivalent to setting page_size with # regards to allowing the user to tune how many results to download # while we wait for the query to finish. See internal issue: - # 344008814. - if page_size is None and max_results is not None: + # 344008814. But if start_index is set, user is trying to access a + # specific page, so we don't need to set page_size. See issue #1950. + if page_size is None and max_results is not None and start_index is None: page_size = max_results # When timeout has default sentinel value ``object()``, do not pass diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 79c15cf23..fed077e26 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.24.0" +__version__ = "3.25.0" diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index a6c397822..8f0bfaad4 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index b35cc414c..25ed0977b 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 google-auth-oauthlib==1.2.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 64d436dcf..b35a54a76 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index becaaf50a..2b3e4713e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==23.2.0 -certifi==2024.2.2 +certifi==2024.6.2 cffi===1.15.1; python_version == '3.7' cffi==1.16.0; python_version >= '3.8' charset-normalizer==3.3.2 @@ -14,19 +14,20 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==0.14.4; python_version >= '3.9' google-api-core==2.19.0 -google-auth==2.29.0 -google-cloud-bigquery==3.23.1 +google-auth==2.30.0 +google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.7.0 -googleapis-common-protos==1.63.0 +google-resumable-media==2.7.1 +googleapis-common-protos==1.63.1 grpcio===1.62.2; python_version == '3.7' -grpcio==1.64.0; python_version >= '3.8' +grpcio==1.64.1; python_version >= '3.8' idna==3.7 munch==4.0.0 mypy-extensions==1.0.0 -packaging==24.0 +packaging===24.0; python_version == '3.7' +packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' @@ -49,7 +50,7 @@ rsa==4.9 Shapely==2.0.4 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.0; python_version >= '3.8' +typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' urllib3==2.2.1; python_version >= '3.8' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index a6c397822..8f0bfaad4 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index e3a225b79..00f0b15d0 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google.cloud.bigquery==3.23.1 +google.cloud.bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index a6c397822..8f0bfaad4 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index f774ea183..91a4a87e6 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ db-dtypes==1.2.0 -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index bd1ba5028..b65023b00 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.2.1; python_version >= '3.8' +pytest==8.2.2; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 89fe16387..054fa2658 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.23.1 +google-cloud-bigquery==3.24.0 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 862ef3245..95c679a14 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1788,20 +1788,35 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): # in the sorted order. expected_data = [ + [ + ("by", "pg"), + ("id", 1), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 21, 51, tzinfo=datetime.timezone.utc + ), + ), + ], [ ("by", "phyllis"), ("id", 2), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 30, 28, tzinfo=UTC)), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 30, 28, tzinfo=datetime.timezone.utc + ), + ), ], [ ("by", "phyllis"), ("id", 3), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 40, 33, tzinfo=UTC)), - ], - [ - ("by", "onebeerdave"), - ("id", 4), - ("timestamp", datetime.datetime(2006, 10, 9, 18, 47, 42, tzinfo=UTC)), + ( + "timestamp", + datetime.datetime( + 2006, 10, 9, 18, 40, 33, tzinfo=datetime.timezone.utc + ), + ), ], ] diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index e1fa2641f..becf3e959 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -843,3 +843,42 @@ def test_parquet_options_setter_clearing(self): config.parquet_options = None self.assertNotIn("parquetOptions", config._properties["load"]) + + def test_column_name_character_map_missing(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + self.assertEqual( + config.column_name_character_map, + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) + + def test_column_name_character_map_hit(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config._properties["load"]["columnNameCharacterMap"] = "STRICT" + self.assertEqual( + config.column_name_character_map, + ColumnNameCharacterMap.STRICT, + ) + + def test_column_name_character_map_setter(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config.column_name_character_map = "V1" + self.assertEqual( + config._properties["load"]["columnNameCharacterMap"], + ColumnNameCharacterMap.V1, + ) + + def test_column_name_character_map_none(self): + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = self._get_target_class()() + config.column_name_character_map = None + self.assertEqual( + config._properties["load"]["columnNameCharacterMap"], + ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, + ) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 5b69c98cf..4bbd31c73 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1652,7 +1652,17 @@ def test_result_with_start_index(self): start_index = 1 - result = job.result(start_index=start_index) + # Verifies that page_size isn't overwritten by max_results when + # start_index is not None. See + # https://github.com/googleapis/python-bigquery/issues/1950 + page_size = 10 + max_results = 100 + + result = job.result( + page_size=page_size, + max_results=max_results, + start_index=start_index, + ) self.assertIsInstance(result, RowIterator) self.assertEqual(result.total_rows, 5) @@ -1665,6 +1675,9 @@ def test_result_with_start_index(self): self.assertEqual( tabledata_list_request[1]["query_params"]["startIndex"], start_index ) + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], page_size + ) def test_result_error(self): from google.cloud import exceptions diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 4071e57e0..f5c77c448 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -122,6 +122,26 @@ def test_connect_w_both_clients(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + def test_connect_prefer_bqstorage_client_false(self): + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery.dbapi import connect + from google.cloud.bigquery.dbapi import Connection + + mock_client = self._mock_client() + mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + + connection = connect( + client=mock_client, + bqstorage_client=mock_bqstorage_client, + prefer_bqstorage_client=False, + ) + + mock_client._ensure_bqstorage_client.assert_not_called() + self.assertIsInstance(connection, Connection) + self.assertIs(connection._client, mock_client) + self.assertIs(connection._bqstorage_client, None) + def test_raises_error_if_closed(self): from google.cloud.bigquery.dbapi.exceptions import ProgrammingError