From cf2d3c6729226ba98181864050dc3c8470035505 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Tue, 27 Feb 2024 12:21:15 +0100 Subject: [PATCH 01/37] Fixed regex to parse version in lambda package file (#2767) Co-authored-by: Anton Pirker --- .craft.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.craft.yml b/.craft.yml index 21d4fc7496..70875d5404 100644 --- a/.craft.yml +++ b/.craft.yml @@ -8,7 +8,9 @@ targets: pypi:sentry-sdk: - name: github - name: aws-lambda-layer - includeNames: /^sentry-python-serverless-\d+(\.\d+)*\.zip$/ + # This regex that matches the version is taken from craft: + # https://github.com/getsentry/craft/blob/8d77c38ddbe4be59f98f61b6e42952ca087d3acd/src/utils/version.ts#L11 + includeNames: /^sentry-python-serverless-\bv?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(?:-?([\da-z-]+(?:\.[\da-z-]+)*))?(?:\+([\da-z-]+(?:\.[\da-z-]+)*))?\b.zip$/ layerName: SentryPythonServerlessSDK compatibleRuntimes: - name: python From 69d2be1964e74da5c46d2e20ce2a7ad47564a3e4 Mon Sep 17 00:00:00 2001 From: Ole Date: Tue, 27 Feb 2024 13:03:30 +0100 Subject: [PATCH 02/37] ref(scrubber): Add recursive scrubbing to EventScrubber (#2755) --------- Co-authored-by: Ivana Kellyerova --- sentry_sdk/scrubber.py | 23 ++++++++++++++++++++--- tests/test_scrubber.py | 15 +++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py index 838ef08b4b..312f042c44 100644 --- a/sentry_sdk/scrubber.py +++ b/sentry_sdk/scrubber.py @@ -59,19 +59,36 @@ class EventScrubber(object): - def __init__(self, denylist=None): - # type: (Optional[List[str]]) -> None + def __init__(self, denylist=None, recursive=False): + # type: (Optional[List[str]], bool) -> None self.denylist = DEFAULT_DENYLIST if denylist is None else denylist self.denylist = [x.lower() for x in self.denylist] + self.recursive = recursive + + def scrub_list(self, lst): + # type: (List[Any]) -> None + if not isinstance(lst, list): + return + + for v in lst: + if isinstance(v, dict): + self.scrub_dict(v) + elif isinstance(v, list): + self.scrub_list(v) def scrub_dict(self, d): # type: (Dict[str, Any]) -> None if not isinstance(d, dict): return - for k in d.keys(): + for k, v in d.items(): if isinstance(k, string_types) and k.lower() in self.denylist: d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() + elif self.recursive: + if isinstance(v, dict): + self.scrub_dict(v) + elif isinstance(v, list): + self.scrub_list(v) def scrub_request(self, event): # type: (Event) -> None diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py index 4b2dfff450..126bf158d8 100644 --- a/tests/test_scrubber.py +++ b/tests/test_scrubber.py @@ -169,3 +169,18 @@ def test_scrubbing_doesnt_affect_local_vars(sentry_init, capture_events): (frame,) = frames assert frame["vars"]["password"] == "[Filtered]" assert password == "cat123" + + +def test_recursive_event_scrubber(sentry_init, capture_events): + sentry_init(event_scrubber=EventScrubber(recursive=True)) + events = capture_events() + complex_structure = { + "deep": { + "deeper": [{"deepest": {"password": "my_darkest_secret"}}], + }, + } + + capture_event({"extra": complex_structure}) + + (event,) = events + assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'" From 877e47ff8356e7d9e305dbad37a2f34ae9fd3db5 Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 27 Feb 2024 15:08:56 +0100 Subject: [PATCH 03/37] docs: Add documentation comment to `scrub_list` (#2769) The new comment explains what the method does, allowing developers to more quickly understand the method's purpose. --- sentry_sdk/scrubber.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py index 312f042c44..a6c55af4fd 100644 --- a/sentry_sdk/scrubber.py +++ b/sentry_sdk/scrubber.py @@ -67,6 +67,12 @@ def __init__(self, denylist=None, recursive=False): def scrub_list(self, lst): # type: (List[Any]) -> None + """ + If a list is passed to this method, the method recursively searches the list and any + nested lists for any dictionaries. The method calls scrub_dict on all dictionaries + it finds. + If the parameter passed to this method is not a list, the method does nothing. + """ if not isinstance(lst, list): return From f87440749ccda8c7dcf3f0403a6cf9650fedd843 Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Wed, 28 Feb 2024 10:45:23 +0100 Subject: [PATCH 04/37] fix(metrics): Replace invalid tag values with an empty string instead of _ (#2773) --- sentry_sdk/metrics.py | 2 +- tests/test_metrics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/metrics.py b/sentry_sdk/metrics.py index b52e30b6b9..2adb1192a5 100644 --- a/sentry_sdk/metrics.py +++ b/sentry_sdk/metrics.py @@ -55,7 +55,7 @@ _in_metrics = ContextVar("in_metrics", default=False) _sanitize_key = partial(re.compile(r"[^a-zA-Z0-9_/.-]+").sub, "_") -_sanitize_value = partial(re.compile(r"[^\w\d_:/@\.{}\[\]$-]+", re.UNICODE).sub, "_") +_sanitize_value = partial(re.compile(r"[^\w\d_:/@\.{}\[\]$-]+", re.UNICODE).sub, "") _set = set # set is shadowed below GOOD_TRANSACTION_SOURCES = frozenset( diff --git a/tests/test_metrics.py b/tests/test_metrics.py index d3cfd659d1..a57aeda2fa 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -822,7 +822,7 @@ def test_tag_normalization( assert len(m) == 3 assert m[0][4] == { - "foo-bar": "_$foo", + "foo-bar": "$foo", "release": "fun-release@1.0.0", "environment": "not-fun-env", } From e07a128a5ff6e646421ee14bce7b5856d8d6896c Mon Sep 17 00:00:00 2001 From: Francesco Vigliaturo Date: Wed, 28 Feb 2024 15:25:12 +0100 Subject: [PATCH 05/37] fix(docs): allow empty character in metric tags values (#2775) * allow empty char in tags values --- sentry_sdk/metrics.py | 2 +- tests/test_metrics.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/metrics.py b/sentry_sdk/metrics.py index 2adb1192a5..b59cf033ec 100644 --- a/sentry_sdk/metrics.py +++ b/sentry_sdk/metrics.py @@ -55,7 +55,7 @@ _in_metrics = ContextVar("in_metrics", default=False) _sanitize_key = partial(re.compile(r"[^a-zA-Z0-9_/.-]+").sub, "_") -_sanitize_value = partial(re.compile(r"[^\w\d_:/@\.{}\[\]$-]+", re.UNICODE).sub, "") +_sanitize_value = partial(re.compile(r"[^\w\d\s_:/@\.{}\[\]$-]+", re.UNICODE).sub, "") _set = set # set is shadowed below GOOD_TRANSACTION_SOURCES = frozenset( diff --git a/tests/test_metrics.py b/tests/test_metrics.py index a57aeda2fa..1d4a49fcb2 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -811,6 +811,7 @@ def test_tag_normalization( metrics.distribution("a", 1.0, tags={"foo-bar": "%$foo"}, timestamp=ts) metrics.distribution("b", 1.0, tags={"foo$$$bar": "blah{}"}, timestamp=ts) metrics.distribution("c", 1.0, tags={u"foö-bar": u"snöwmän"}, timestamp=ts) + metrics.distribution("d", 1.0, tags={"route": "GET /foo"}, timestamp=ts) # fmt: on Hub.current.flush() @@ -820,7 +821,7 @@ def test_tag_normalization( assert envelope.items[0].headers["type"] == "statsd" m = parse_metrics(envelope.items[0].payload.get_bytes()) - assert len(m) == 3 + assert len(m) == 4 assert m[0][4] == { "foo-bar": "$foo", "release": "fun-release@1.0.0", @@ -839,6 +840,11 @@ def test_tag_normalization( "release": "fun-release@1.0.0", "environment": "not-fun-env", } + assert m[3][4] == { + "release": "fun-release@1.0.0", + "environment": "not-fun-env", + "route": "GET /foo", + } # fmt: on From 0901953c93071e858f4da67c1e864766ae19c002 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Thu, 29 Feb 2024 09:36:43 +0100 Subject: [PATCH 06/37] Allow to configure merge target for releases (#2777) --- .github/workflows/release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 31c0a616f3..f55ec12407 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,6 +9,9 @@ on: force: description: Force a release even when there are release-blockers (optional) required: false + merge_target: + description: Target branch to merge into. Uses the default branch as a fallback (optional) + required: false jobs: release: @@ -26,3 +29,4 @@ jobs: with: version: ${{ github.event.inputs.version }} force: ${{ github.event.inputs.force }} + merge_target: ${{ github.event.inputs.merge_target }} From c5785fb4b6911bfaa1284f33be7dff510edd7a71 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Tue, 5 Mar 2024 12:50:58 +0100 Subject: [PATCH 07/37] feat(transport): Expose `socket_options` (#2786) --- sentry_sdk/client.py | 6 ++++++ sentry_sdk/consts.py | 2 ++ sentry_sdk/transport.py | 14 +++++++++----- tests/test_transport.py | 28 ++++++++++++++++++++-------- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 270d814bfe..64e65a8cb6 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -148,6 +148,12 @@ def _get_options(*args, **kwargs): if rv["event_scrubber"] is None: rv["event_scrubber"] = EventScrubber() + if rv["socket_options"] and not isinstance(rv["socket_options"], list): + logger.warning( + "Ignoring socket_options because of unexpected format. See urllib3.HTTPConnection.socket_options for the expected format." + ) + rv["socket_options"] = None + return rv diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index fe9736938c..c366d04927 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -14,6 +14,7 @@ from typing import Dict from typing import Any from typing import Sequence + from typing import Tuple from typing_extensions import TypedDict from sentry_sdk.integrations import Integration @@ -260,6 +261,7 @@ def __init__( https_proxy=None, # type: Optional[str] ignore_errors=[], # type: Sequence[Union[type, str]] # noqa: B006 max_request_body_size="medium", # type: str + socket_options=None, # type: Optional[List[Tuple[int, int, int | bytes]]] before_send=None, # type: Optional[EventProcessor] before_breadcrumb=None, # type: Optional[BreadcrumbProcessor] debug=None, # type: Optional[bool] diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 8eb00bed12..b924ae502a 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -1,18 +1,17 @@ from __future__ import print_function import io -import urllib3 -import certifi import gzip import time - from datetime import timedelta from collections import defaultdict +import urllib3 +import certifi + from sentry_sdk.utils import Dsn, logger, capture_internal_exceptions, json_dumps from sentry_sdk.worker import BackgroundWorker from sentry_sdk.envelope import Envelope, Item, PayloadRef - from sentry_sdk._compat import datetime_utcnow from sentry_sdk._types import TYPE_CHECKING @@ -441,12 +440,17 @@ def _send_envelope( def _get_pool_options(self, ca_certs): # type: (Optional[Any]) -> Dict[str, Any] - return { + options = { "num_pools": self._num_pools, "cert_reqs": "CERT_REQUIRED", "ca_certs": ca_certs or certifi.where(), } + if self.options["socket_options"]: + options["socket_options"] = self.options["socket_options"] + + return options + def _in_no_proxy(self, parsed_dsn): # type: (Dsn) -> bool no_proxy = getproxies().get("no") diff --git a/tests/test_transport.py b/tests/test_transport.py index 71c47e04fc..aa471b9081 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -3,14 +3,13 @@ import pickle import gzip import io - +import socket +from collections import namedtuple from datetime import datetime, timedelta import pytest -from collections import namedtuple -from werkzeug.wrappers import Request, Response - from pytest_localserver.http import WSGIServer +from werkzeug.wrappers import Request, Response from sentry_sdk import Hub, Client, add_breadcrumb, capture_message, Scope from sentry_sdk._compat import datetime_utcnow @@ -155,6 +154,19 @@ def test_transport_num_pools(make_client, num_pools, expected_num_pools): assert options["num_pools"] == expected_num_pools +def test_socket_options(make_client): + socket_options = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 10), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + + client = make_client(socket_options=socket_options) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == socket_options + + def test_transport_infinite_loop(capturing_server, request, make_client): client = make_client( debug=True, @@ -219,7 +231,7 @@ def test_parse_rate_limits(input, expected): assert dict(_parse_rate_limits(input, now=NOW)) == expected -def test_simple_rate_limits(capturing_server, capsys, caplog, make_client): +def test_simple_rate_limits(capturing_server, make_client): client = make_client() capturing_server.respond_with(code=429, headers={"Retry-After": "4"}) @@ -241,7 +253,7 @@ def test_simple_rate_limits(capturing_server, capsys, caplog, make_client): @pytest.mark.parametrize("response_code", [200, 429]) def test_data_category_limits( - capturing_server, capsys, caplog, response_code, make_client, monkeypatch + capturing_server, response_code, make_client, monkeypatch ): client = make_client(send_client_reports=False) @@ -288,7 +300,7 @@ def record_lost_event(reason, data_category=None, item=None): @pytest.mark.parametrize("response_code", [200, 429]) def test_data_category_limits_reporting( - capturing_server, capsys, caplog, response_code, make_client, monkeypatch + capturing_server, response_code, make_client, monkeypatch ): client = make_client(send_client_reports=True) @@ -371,7 +383,7 @@ def intercepting_fetch(*args, **kwargs): @pytest.mark.parametrize("response_code", [200, 429]) def test_complex_limits_without_data_category( - capturing_server, capsys, caplog, response_code, make_client + capturing_server, response_code, make_client ): client = make_client() capturing_server.respond_with( From 22dd50ca63a355e4f91429a5d93e41de4267207b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Mar 2024 14:51:29 +0100 Subject: [PATCH 08/37] build(deps): bump checkouts/data-schemas from `eb941c2` to `ed078ed` (#2781) Bumps [checkouts/data-schemas](https://github.com/getsentry/sentry-data-schemas) from `eb941c2` to `ed078ed`. - [Commits](https://github.com/getsentry/sentry-data-schemas/compare/eb941c2dcbcff9bc04f35ce7f1837de118f790fe...ed078ed0bb09b9a5d0f387eaf70e449a5ae51cfd) --- updated-dependencies: - dependency-name: checkouts/data-schemas dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ivana Kellyerova --- checkouts/data-schemas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checkouts/data-schemas b/checkouts/data-schemas index eb941c2dcb..ed078ed0bb 160000 --- a/checkouts/data-schemas +++ b/checkouts/data-schemas @@ -1 +1 @@ -Subproject commit eb941c2dcbcff9bc04f35ce7f1837de118f790fe +Subproject commit ed078ed0bb09b9a5d0f387eaf70e449a5ae51cfd From b96f03d6b6ca4d23a06a7e927ea8c5c7723ce751 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Mar 2024 14:10:58 +0000 Subject: [PATCH 09/37] build(deps): bump types-protobuf from 4.24.0.20240129 to 4.24.0.20240302 (#2782) Bumps [types-protobuf](https://github.com/python/typeshed) from 4.24.0.20240129 to 4.24.0.20240302. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-protobuf dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ivana Kellyerova --- linter-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linter-requirements.txt b/linter-requirements.txt index 5fec1f22c4..42a0313e31 100644 --- a/linter-requirements.txt +++ b/linter-requirements.txt @@ -2,7 +2,7 @@ mypy black flake8==5.0.4 # flake8 depends on pyflakes>=3.0.0 and this dropped support for Python 2 "# type:" comments types-certifi -types-protobuf==4.24.0.20240129 # newer raises an error on mypy sentry_sdk +types-protobuf==4.24.0.20240302 # newer raises an error on mypy sentry_sdk types-redis types-setuptools pymongo # There is no separate types module. From d62dc906ef2848d25fdd7937db8367b0191ec107 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 7 Mar 2024 09:36:20 +0100 Subject: [PATCH 10/37] Removed print statements because it messes with the tests (#2789) --- tests/integrations/aws_lambda/client.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/integrations/aws_lambda/client.py b/tests/integrations/aws_lambda/client.py index 265ce6a520..298ebd920d 100644 --- a/tests/integrations/aws_lambda/client.py +++ b/tests/integrations/aws_lambda/client.py @@ -240,7 +240,7 @@ def run_lambda_function( FunctionName=full_fn_name, ) print( - f"Lambda function {full_fn_name} in AWS already existing, taking it (and do not create a local one)" + "Lambda function in AWS already existing, taking it (and do not create a local one)" ) except client.exceptions.ResourceNotFoundException: function_exists_in_aws = False @@ -251,14 +251,9 @@ def run_lambda_function( dir_already_existing = os.path.isdir(base_dir) if dir_already_existing: - print( - f"Local Lambda function directory ({base_dir}) already exists, skipping creation" - ) + print("Local Lambda function directory already exists, skipping creation") if not dir_already_existing: - print( - f"Creating Lambda function package ({full_fn_name}) locally in directory {base_dir}" - ) os.mkdir(base_dir) _create_lambda_package( base_dir, code, initial_handler, layer, syntax_check, subprocess_kwargs @@ -321,10 +316,9 @@ def clean_up(): waiter = client.get_waiter("function_active_v2") waiter.wait(FunctionName=full_fn_name) - print(f"Created Lambda function in AWS: {full_fn_name}") except client.exceptions.ResourceConflictException: print( - f"Lambda function ({full_fn_name}) already existing in AWS, this is fine, we will just invoke it." + "Lambda function already exists, this is fine, we will just invoke it." ) response = client.invoke( From 8f1a125818dbca05a8d76a558ce35f51465b12e9 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Thu, 7 Mar 2024 14:25:01 +0100 Subject: [PATCH 11/37] ref(awslambda): xfail broken tests for now (#2794) --- tests/integrations/aws_lambda/test_aws.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/integrations/aws_lambda/test_aws.py b/tests/integrations/aws_lambda/test_aws.py index 6f51ad14da..bea87adce5 100644 --- a/tests/integrations/aws_lambda/test_aws.py +++ b/tests/integrations/aws_lambda/test_aws.py @@ -661,6 +661,9 @@ def test_handler(event, context): assert response["Payload"]["AssertionError raised"] is False +@pytest.mark.xfail( + reason="The limited log output we depend on is being clogged by a new warning" +) def test_serverless_no_code_instrumentation(run_lambda_function): """ Test that ensures that just by adding a lambda layer containing the @@ -705,6 +708,9 @@ def test_handler(event, context): assert "sentry_handler" in response["LogResult"][3].decode("utf-8") +@pytest.mark.xfail( + reason="The limited log output we depend on is being clogged by a new warning" +) def test_error_has_new_trace_context_performance_enabled(run_lambda_function): envelopes, _, _ = run_lambda_function( LAMBDA_PRELUDE @@ -767,6 +773,9 @@ def test_handler(event, context): ) +@pytest.mark.xfail( + reason="The limited log output we depend on is being clogged by a new warning" +) def test_error_has_existing_trace_context_performance_enabled(run_lambda_function): trace_id = "471a43a4192642f0b136d5159a501701" parent_span_id = "6e8f22c393e68f19" From fc7061113a7f9b1b7804336fce0be951df4ddee7 Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Thu, 7 Mar 2024 13:33:39 +0000 Subject: [PATCH 12/37] release: 1.41.0 --- CHANGELOG.md | 16 ++++++++++++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a57fb34b8..7d0ada9ece 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 1.41.0 + +### Various fixes & improvements + +- ref(awslambda): xfail broken tests for now (#2794) by @sentrivana +- Removed print statements because it messes with the tests (#2789) by @antonpirker +- build(deps): bump types-protobuf from 4.24.0.20240129 to 4.24.0.20240302 (#2782) by @dependabot +- build(deps): bump checkouts/data-schemas from `eb941c2` to `ed078ed` (#2781) by @dependabot +- feat(transport): Expose `socket_options` (#2786) by @sentrivana +- Allow to configure merge target for releases (#2777) by @sentrivana +- fix(docs): allow empty character in metric tags values (#2775) by @viglia +- fix(metrics): Replace invalid tag values with an empty string instead of _ (#2773) by @markushi +- docs: Add documentation comment to `scrub_list` (#2769) by @szokeasaurusrex +- ref(scrubber): Add recursive scrubbing to EventScrubber (#2755) by @Cheapshot003 +- Fixed regex to parse version in lambda package file (#2767) by @sentrivana + ## 1.40.6 ### Various fixes & improvements diff --git a/docs/conf.py b/docs/conf.py index 9a9f3fb56a..8a53738e61 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "1.40.6" +release = "1.41.0" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index c366d04927..2b58aecc24 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -318,4 +318,4 @@ def _get_default_options(): del _get_default_options -VERSION = "1.40.6" +VERSION = "1.41.0" diff --git a/setup.py b/setup.py index ef268c49c9..0af275d6af 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="1.40.6", + version="1.41.0", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", From df9841ed269ce55f14d4c68e1bf05cd7fb89b822 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Thu, 7 Mar 2024 14:35:56 +0100 Subject: [PATCH 13/37] Update CHANGELOG.md --- CHANGELOG.md | 56 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d0ada9ece..cef63eab1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,17 +4,53 @@ ### Various fixes & improvements -- ref(awslambda): xfail broken tests for now (#2794) by @sentrivana -- Removed print statements because it messes with the tests (#2789) by @antonpirker -- build(deps): bump types-protobuf from 4.24.0.20240129 to 4.24.0.20240302 (#2782) by @dependabot -- build(deps): bump checkouts/data-schemas from `eb941c2` to `ed078ed` (#2781) by @dependabot -- feat(transport): Expose `socket_options` (#2786) by @sentrivana +- Add recursive scrubbing to `EventScrubber` (#2755) by @Cheapshot003 + + By default, the `EventScrubber` will not search your events for potential + PII recursively. With this release, you can enable this behavior with: + + ```python + import sentry_sdk + from sentry_sdk.scrubber import EventScrubber + + sentry_sdk.init( + # ...your usual settings... + event_scrubber=EventScrubber(recursive=True), + ) + ``` + +- Expose `socket_options` (#2786) by @sentrivana + + If the SDK is experiencing connection issues (connection resets, server + closing connection without response, etc.) while sending events to Sentry, + tweaking the default `urllib3` socket options to the following can help: + + ```python + import socket + from urllib3.connection import HTTPConnection + import sentry_sdk + + sentry_sdk.init( + # ...your usual settings... + socket_options=HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + # note: skip the following line if you're on MacOS since TCP_KEEPIDLE doesn't exist there + (socket.SOL_TCP, socket.TCP_KEEPIDLE, 45), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 10), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ], + ) + ``` + - Allow to configure merge target for releases (#2777) by @sentrivana -- fix(docs): allow empty character in metric tags values (#2775) by @viglia -- fix(metrics): Replace invalid tag values with an empty string instead of _ (#2773) by @markushi -- docs: Add documentation comment to `scrub_list` (#2769) by @szokeasaurusrex -- ref(scrubber): Add recursive scrubbing to EventScrubber (#2755) by @Cheapshot003 -- Fixed regex to parse version in lambda package file (#2767) by @sentrivana +- Allow empty character in metric tags values (#2775) by @viglia +- Replace invalid tag values with an empty string instead of _ (#2773) by @markushi +- Add documentation comment to `scrub_list` (#2769) by @szokeasaurusrex +- Fixed regex to parse version in lambda package file (#2767) by @antonpirker +- xfail broken AWS Lambda tests for now (#2794) by @sentrivana +- Removed print statements because it messes with the tests (#2789) by @antonpirker +- Bump `types-protobuf` from 4.24.0.20240129 to 4.24.0.20240302 (#2782) by @dependabot +- Bump `checkouts/data-schemas` from `eb941c2` to `ed078ed` (#2781) by @dependabot ## 1.40.6 From 461bd59cf159cd780010d7c45e8f0aa6dd873f3c Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Mon, 11 Mar 2024 10:52:30 +0100 Subject: [PATCH 14/37] ref: Improve scrub_dict typing (#2768) This change improves the typing of the scrub_dict method. Previously, the scrub_dict method's type hints indicated that only dict[str, Any] was accepted as the parameter. However, the method is actually implemented to accept any object, since it checks the types of the parameters at runtime. Therefore, object is a more appropriate type hint for the parameter. #2753 depends on this change for mypy to pass --- sentry_sdk/scrubber.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py index a6c55af4fd..3f089ab8f6 100644 --- a/sentry_sdk/scrubber.py +++ b/sentry_sdk/scrubber.py @@ -1,3 +1,8 @@ +try: + from typing import cast +except ImportError: + cast = lambda _, obj: obj + from sentry_sdk.utils import ( capture_internal_exceptions, AnnotatedValue, @@ -8,8 +13,6 @@ if TYPE_CHECKING: from sentry_sdk._types import Event - from typing import Any - from typing import Dict from typing import List from typing import Optional @@ -66,7 +69,7 @@ def __init__(self, denylist=None, recursive=False): self.recursive = recursive def scrub_list(self, lst): - # type: (List[Any]) -> None + # type: (object) -> None """ If a list is passed to this method, the method recursively searches the list and any nested lists for any dictionaries. The method calls scrub_dict on all dictionaries @@ -77,24 +80,28 @@ def scrub_list(self, lst): return for v in lst: - if isinstance(v, dict): - self.scrub_dict(v) - elif isinstance(v, list): - self.scrub_list(v) + self.scrub_dict(v) # no-op unless v is a dict + self.scrub_list(v) # no-op unless v is a list def scrub_dict(self, d): - # type: (Dict[str, Any]) -> None + # type: (object) -> None + """ + If a dictionary is passed to this method, the method scrubs the dictionary of any + sensitive data. The method calls itself recursively on any nested dictionaries ( + including dictionaries nested in lists) if self.recursive is True. + This method does nothing if the parameter passed to it is not a dictionary. + """ if not isinstance(d, dict): return for k, v in d.items(): - if isinstance(k, string_types) and k.lower() in self.denylist: + # The cast is needed because mypy is not smart enough to figure out that k must be a + # string after the isinstance check. + if isinstance(k, string_types) and cast(str, k).lower() in self.denylist: d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() elif self.recursive: - if isinstance(v, dict): - self.scrub_dict(v) - elif isinstance(v, list): - self.scrub_list(v) + self.scrub_dict(v) # no-op unless v is a dict + self.scrub_list(v) # no-op unless v is a list def scrub_request(self, event): # type: (Event) -> None From 46a632d10a382312707bd4af2d016934b202e129 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Mon, 11 Mar 2024 14:23:53 +0100 Subject: [PATCH 15/37] Propagate sentry-trace and baggage to huey tasks (#2792) This PR enables passing `sentry-trace` and `baggage` headers to background tasks using the Huey task queue. This allows easily correlating what happens inside a background task with whatever transaction (e.g. a user request in a Django application) queued the task in the first place. Periodic tasks do not get these headers, because otherwise each execution of the periodic task would be tied to the same parent trace (the long-running worker process). --- Co-authored-by: Anton Pirker --- sentry_sdk/integrations/huey.py | 24 ++++++++++++++++++++---- tests/integrations/huey/test_huey.py | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/huey.py b/sentry_sdk/integrations/huey.py index 9641160099..43c03936b1 100644 --- a/sentry_sdk/integrations/huey.py +++ b/sentry_sdk/integrations/huey.py @@ -6,10 +6,15 @@ from sentry_sdk._compat import reraise from sentry_sdk._types import TYPE_CHECKING from sentry_sdk import Hub +from sentry_sdk.api import continue_trace, get_baggage, get_traceparent from sentry_sdk.consts import OP from sentry_sdk.hub import _should_send_default_pii from sentry_sdk.integrations import DidNotEnable, Integration -from sentry_sdk.tracing import Transaction, TRANSACTION_SOURCE_TASK +from sentry_sdk.tracing import ( + BAGGAGE_HEADER_NAME, + SENTRY_TRACE_HEADER_NAME, + TRANSACTION_SOURCE_TASK, +) from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, @@ -25,7 +30,7 @@ F = TypeVar("F", bound=Callable[..., Any]) try: - from huey.api import Huey, Result, ResultGroup, Task + from huey.api import Huey, Result, ResultGroup, Task, PeriodicTask from huey.exceptions import CancelExecution, RetryTask, TaskLockedException except ImportError: raise DidNotEnable("Huey is not installed") @@ -56,6 +61,14 @@ def _sentry_enqueue(self, task): return old_enqueue(self, task) with hub.start_span(op=OP.QUEUE_SUBMIT_HUEY, description=task.name): + if not isinstance(task, PeriodicTask): + # Attach trace propagation data to task kwargs. We do + # not do this for periodic tasks, as these don't + # really have an originating transaction. + task.kwargs["sentry_headers"] = { + BAGGAGE_HEADER_NAME: get_baggage(), + SENTRY_TRACE_HEADER_NAME: get_traceparent(), + } return old_enqueue(self, task) Huey.enqueue = _sentry_enqueue @@ -145,12 +158,15 @@ def _sentry_execute(self, task, timestamp=None): scope.clear_breadcrumbs() scope.add_event_processor(_make_event_processor(task)) - transaction = Transaction( + sentry_headers = task.kwargs.pop("sentry_headers", None) + + transaction = continue_trace( + sentry_headers or {}, name=task.name, - status="ok", op=OP.QUEUE_TASK_HUEY, source=TRANSACTION_SOURCE_TASK, ) + transaction.set_status("ok") if not getattr(task, "_sentry_is_patched", False): task.execute = _wrap_task_execute(task.execute) diff --git a/tests/integrations/huey/test_huey.py b/tests/integrations/huey/test_huey.py index 0bebd91b19..48a3da97f4 100644 --- a/tests/integrations/huey/test_huey.py +++ b/tests/integrations/huey/test_huey.py @@ -172,3 +172,21 @@ def dummy_task(): assert len(event["spans"]) assert event["spans"][0]["op"] == "queue.submit.huey" assert event["spans"][0]["description"] == "different_task_name" + + +def test_huey_propagate_trace(init_huey, capture_events): + huey = init_huey() + + events = capture_events() + + @huey.task() + def propagated_trace_task(): + pass + + with start_transaction() as outer_transaction: + execute_huey_task(huey, propagated_trace_task) + + assert ( + events[0]["transaction"] == "propagated_trace_task" + ) # the "inner" transaction + assert events[0]["contexts"]["trace"]["trace_id"] == outer_transaction.trace_id From ff0a94b5f1c1eb5063f99aca8b9e267e86a6a177 Mon Sep 17 00:00:00 2001 From: colin-sentry <161344340+colin-sentry@users.noreply.github.com> Date: Mon, 11 Mar 2024 10:06:02 -0400 Subject: [PATCH 16/37] OpenAI integration (#2791) * OpenAI integration * Fix linting errors * Fix CI * Fix lint * Fix more CI issues * Run tests on version pinned OpenAI too * Fix pydantic issue in test * Import type in TYPE_CHECKING gate * PR feedback fixes * Fix tiktoken test variant * PII gate the request and response * Rename set_data tags * Move doc location * Add "exclude prompts" flag as optional * Change prompts to be excluded by default * Set flag in tests * Fix tiktoken tox.ini extra dash * Change strip PII semantics * More test coverage for PII * notiktoken --------- Co-authored-by: Anton Pirker --- .../test-integrations-data-processing.yml | 14 +- mypy.ini | 2 + .../split-tox-gh-actions.py | 1 + sentry_sdk/consts.py | 2 + sentry_sdk/integrations/__init__.py | 1 + sentry_sdk/integrations/openai.py | 279 ++++++++++++++++++ setup.py | 1 + tests/integrations/openai/__init__.py | 3 + tests/integrations/openai/test_openai.py | 231 +++++++++++++++ tox.ini | 13 + 10 files changed, 546 insertions(+), 1 deletion(-) create mode 100644 sentry_sdk/integrations/openai.py create mode 100644 tests/integrations/openai/__init__.py create mode 100644 tests/integrations/openai/test_openai.py diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml index ddac93d1e5..c40d45845d 100644 --- a/.github/workflows/test-integrations-data-processing.yml +++ b/.github/workflows/test-integrations-data-processing.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.5","3.7","3.8","3.11","3.12"] + python-version: ["3.5","3.7","3.8","3.9","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6 @@ -58,6 +58,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh "py${{ matrix.python-version }}-huey-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai latest + run: | + set -x # print commands that are executed + ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq latest run: | set -x # print commands that are executed @@ -110,6 +114,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai pinned + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq pinned run: | set -x # print commands that are executed @@ -151,6 +159,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py2.7-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai py27 + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py2.7-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq py27 run: | set -x # print commands that are executed diff --git a/mypy.ini b/mypy.ini index fef90c867e..c1444d61e5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -67,6 +67,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-huey.*] ignore_missing_imports = True +[mypy-openai.*] +ignore_missing_imports = True [mypy-arq.*] ignore_missing_imports = True [mypy-grpc.*] diff --git a/scripts/split-tox-gh-actions/split-tox-gh-actions.py b/scripts/split-tox-gh-actions/split-tox-gh-actions.py index f8beffc219..13b81283ca 100755 --- a/scripts/split-tox-gh-actions/split-tox-gh-actions.py +++ b/scripts/split-tox-gh-actions/split-tox-gh-actions.py @@ -70,6 +70,7 @@ "beam", "celery", "huey", + "openai", "rq", ], "Databases": [ diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 2b58aecc24..e4edfddef1 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -219,6 +219,8 @@ class OP: MIDDLEWARE_STARLITE = "middleware.starlite" MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive" MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send" + OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai" + OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai" QUEUE_SUBMIT_ARQ = "queue.submit.arq" QUEUE_TASK_ARQ = "queue.task.arq" QUEUE_SUBMIT_CELERY = "queue.submit.celery" diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 21f7188ff1..c9737ae589 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "sentry_sdk.integrations.fastapi.FastApiIntegration", "sentry_sdk.integrations.flask.FlaskIntegration", "sentry_sdk.integrations.httpx.HttpxIntegration", + "sentry_sdk.integrations.openai.OpenAIIntegration", "sentry_sdk.integrations.pyramid.PyramidIntegration", "sentry_sdk.integrations.redis.RedisIntegration", "sentry_sdk.integrations.rq.RqIntegration", diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py new file mode 100644 index 0000000000..5c05a43916 --- /dev/null +++ b/sentry_sdk/integrations/openai.py @@ -0,0 +1,279 @@ +from sentry_sdk import consts +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Iterable, List, Optional, Callable, Iterator + from sentry_sdk.tracing import Span + +import sentry_sdk +from sentry_sdk._functools import wraps +from sentry_sdk.hub import Hub, _should_send_default_pii +from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception + +try: + from openai.resources.chat.completions import Completions + from openai.resources import Embeddings + + if TYPE_CHECKING: + from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk +except ImportError: + raise DidNotEnable("OpenAI not installed") + +try: + import tiktoken # type: ignore + + enc = tiktoken.get_encoding("cl100k_base") + + def count_tokens(s): + # type: (str) -> int + return len(enc.encode_ordinary(s)) + + logger.debug("[OpenAI] using tiktoken to count tokens") +except ImportError: + logger.info( + "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" + "Please install 'tiktoken' if you aren't receiving token usage in Sentry." + "See https://docs.sentry.io/platforms/python/integrations/openai/ for more information." + ) + + def count_tokens(s): + # type: (str) -> int + return 0 + + +COMPLETION_TOKENS_USED = "ai.completion_tоkens.used" +PROMPT_TOKENS_USED = "ai.prompt_tоkens.used" +TOTAL_TOKENS_USED = "ai.total_tоkens.used" + + +class OpenAIIntegration(Integration): + identifier = "openai" + + def __init__(self, include_prompts=True): + # type: (OpenAIIntegration, bool) -> None + self.include_prompts = include_prompts + + @staticmethod + def setup_once(): + # type: () -> None + Completions.create = _wrap_chat_completion_create(Completions.create) + Embeddings.create = _wrap_embeddings_create(Embeddings.create) + + +def _capture_exception(hub, exc): + # type: (Hub, Any) -> None + + if hub.client is not None: + event, hint = event_from_exception( + exc, + client_options=hub.client.options, + mechanism={"type": "openai", "handled": False}, + ) + hub.capture_event(event, hint=hint) + + +def _calculate_chat_completion_usage( + messages, response, span, streaming_message_responses=None +): + # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]]) -> None + completion_tokens = 0 + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "completion_tokens") and isinstance( + response.usage.completion_tokens, int + ): + completion_tokens = response.usage.completion_tokens + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + for message in messages: + if "content" in message: + prompt_tokens += count_tokens(message["content"]) + + if completion_tokens == 0: + if streaming_message_responses is not None: + for message in streaming_message_responses: + completion_tokens += count_tokens(message) + elif hasattr(response, "choices"): + for choice in response.choices: + if hasattr(choice, "message"): + completion_tokens += count_tokens(choice.message) + + if total_tokens == 0: + total_tokens = prompt_tokens + completion_tokens + + if completion_tokens != 0: + span.set_data(COMPLETION_TOKENS_USED, completion_tokens) + if prompt_tokens != 0: + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + if total_tokens != 0: + span.set_data(TOTAL_TOKENS_USED, total_tokens) + + +def _wrap_chat_completion_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + @wraps(f) + def new_chat_completion(*args, **kwargs): + # type: (*Any, **Any) -> Any + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + if "messages" not in kwargs: + # invalid call (in all versions of openai), let it return error + return f(*args, **kwargs) + + try: + iter(kwargs["messages"]) + except TypeError: + # invalid call (in all versions), messages must be iterable + return f(*args, **kwargs) + + kwargs["messages"] = list(kwargs["messages"]) + messages = kwargs["messages"] + model = kwargs.get("model") + streaming = kwargs.get("stream") + + span = sentry_sdk.start_span( + op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion" + ) + span.__enter__() + try: + res = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + span.__exit__(None, None, None) + raise e from None + + with capture_internal_exceptions(): + if _should_send_default_pii() and integration.include_prompts: + span.set_data("ai.input_messages", messages) + span.set_data("ai.model_id", model) + span.set_data("ai.streaming", streaming) + + if hasattr(res, "choices"): + if _should_send_default_pii() and integration.include_prompts: + span.set_data( + "ai.responses", list(map(lambda x: x.message, res.choices)) + ) + _calculate_chat_completion_usage(messages, res, span) + span.__exit__(None, None, None) + elif hasattr(res, "_iterator"): + data_buf: list[list[str]] = [] # one for each choice + + old_iterator = res._iterator # type: Iterator[ChatCompletionChunk] + + def new_iterator(): + # type: () -> Iterator[ChatCompletionChunk] + with capture_internal_exceptions(): + for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list( + map(lambda chunk: "".join(chunk), data_buf) + ) + if ( + _should_send_default_pii() + and integration.include_prompts + ): + span.set_data("ai.responses", all_responses) + _calculate_chat_completion_usage( + messages, res, span, all_responses + ) + span.__exit__(None, None, None) + + res._iterator = new_iterator() + else: + span.set_data("unknown_response", True) + span.__exit__(None, None, None) + return res + + return new_chat_completion + + +def _wrap_embeddings_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + + @wraps(f) + def new_embeddings_create(*args, **kwargs): + # type: (*Any, **Any) -> Any + + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + with sentry_sdk.start_span( + op=consts.OP.OPENAI_EMBEDDINGS_CREATE, + description="OpenAI Embedding Creation", + ) as span: + if "input" in kwargs and ( + _should_send_default_pii() and integration.include_prompts + ): + if isinstance(kwargs["input"], str): + span.set_data("ai.input_messages", [kwargs["input"]]) + elif ( + isinstance(kwargs["input"], list) + and len(kwargs["input"]) > 0 + and isinstance(kwargs["input"][0], str) + ): + span.set_data("ai.input_messages", kwargs["input"]) + if "model" in kwargs: + span.set_data("ai.model_id", kwargs["model"]) + try: + response = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + raise e from None + + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + prompt_tokens = count_tokens(kwargs["input"] or "") + + if total_tokens == 0: + total_tokens = prompt_tokens + + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + span.set_data(TOTAL_TOKENS_USED, total_tokens) + + return response + + return new_embeddings_create diff --git a/setup.py b/setup.py index 0af275d6af..0299bf91fb 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def get_file_text(file_name): "httpx": ["httpx>=0.16.0"], "huey": ["huey>=2"], "loguru": ["loguru>=0.5"], + "openai": ["openai>=1.0.0", "tiktoken>=0.3.0"], "opentelemetry": ["opentelemetry-distro>=0.35b0"], "opentelemetry-experimental": [ "opentelemetry-distro~=0.40b0", diff --git a/tests/integrations/openai/__init__.py b/tests/integrations/openai/__init__.py new file mode 100644 index 0000000000..d6cc3d5505 --- /dev/null +++ b/tests/integrations/openai/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("openai") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py new file mode 100644 index 0000000000..ecdedd2694 --- /dev/null +++ b/tests/integrations/openai/test_openai.py @@ -0,0 +1,231 @@ +import pytest +from openai import OpenAI, Stream, OpenAIError +from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat.chat_completion import Choice +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice +from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage + +from sentry_sdk import start_transaction +from sentry_sdk.integrations.openai import ( + OpenAIIntegration, + COMPLETION_TOKENS_USED, + PROMPT_TOKENS_USED, + TOTAL_TOKENS_USED, +) + +from unittest import mock # python 3.3 and above + + +EXAMPLE_CHAT_COMPLETION = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="the model response" + ), + ) + ], + created=10000000, + model="model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), +) + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_nonstreaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "the model response" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + assert span["data"][COMPLETION_TOKENS_USED] == 10 + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 + + +# noinspection PyTypeChecker +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_streaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=None) + returned_stream._iterator = [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "hello world" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"][COMPLETION_TOKENS_USED] == 2 + assert span["data"][PROMPT_TOKENS_USED] == 1 + assert span["data"][TOTAL_TOKENS_USED] == 3 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_embeddings_create( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + + returned_embedding = CreateEmbeddingResponse( + data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], + model="some-model", + object="list", + usage=EmbeddingTokenUsage( + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.embeddings._post = mock.Mock(return_value=returned_embedding) + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.embeddings.create.openai" + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0] + else: + assert "ai.input_messages" not in span["data"] + + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 diff --git a/tox.ini b/tox.ini index a23251f186..1e7ba06a00 100644 --- a/tox.ini +++ b/tox.ini @@ -146,6 +146,11 @@ envlist = {py3.5,py3.11,py3.12}-loguru-v{0.5} {py3.5,py3.11,py3.12}-loguru-latest + # OpenAI + {py3.9,py3.11,py3.12}-openai-v1 + {py3.9,py3.11,py3.12}-openai-latest + {py3.9,py3.11,py3.12}-openai-notiktoken + # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry @@ -439,6 +444,13 @@ deps = loguru-v0.5: loguru~=0.5.0 loguru-latest: loguru + # OpenAI + openai-v1: openai~=1.0.0 + openai-v1: tiktoken~=0.6.0 + openai-latest: openai + openai-latest: tiktoken~=0.6.0 + openai-notiktoken: openai + # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro @@ -597,6 +609,7 @@ setenv = httpx: TESTPATH=tests/integrations/httpx huey: TESTPATH=tests/integrations/huey loguru: TESTPATH=tests/integrations/loguru + openai: TESTPATH=tests/integrations/openai opentelemetry: TESTPATH=tests/integrations/opentelemetry pure_eval: TESTPATH=tests/integrations/pure_eval pymongo: TESTPATH=tests/integrations/pymongo From f40e27f16ef4285563a52f1889808e669126a381 Mon Sep 17 00:00:00 2001 From: colin-sentry <161344340+colin-sentry@users.noreply.github.com> Date: Tue, 12 Mar 2024 07:13:16 -0400 Subject: [PATCH 17/37] Add a method for normalizing data passed to set_data (#2800) --- sentry_sdk/integrations/openai.py | 55 +++++++++++++++++------- tests/integrations/openai/test_openai.py | 2 +- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 5c05a43916..0e71029b60 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -73,6 +73,28 @@ def _capture_exception(hub, exc): hub.capture_event(event, hint=hint) +def _normalize_data(data): + # type: (Any) -> Any + + # convert pydantic data (e.g. OpenAI v1+) to json compatible format + if hasattr(data, "model_dump"): + try: + return data.model_dump() + except Exception as e: + logger.warning("Could not convert pydantic data to JSON: %s", e) + return data + if isinstance(data, list): + return list(_normalize_data(x) for x in data) + if isinstance(data, dict): + return {k: _normalize_data(v) for (k, v) in data.items()} + return data + + +def set_data_normalized(span, key, value): + # type: (Span, str, Any) -> None + span.set_data(key, _normalize_data(value)) + + def _calculate_chat_completion_usage( messages, response, span, streaming_message_responses=None ): @@ -112,11 +134,11 @@ def _calculate_chat_completion_usage( total_tokens = prompt_tokens + completion_tokens if completion_tokens != 0: - span.set_data(COMPLETION_TOKENS_USED, completion_tokens) + set_data_normalized(span, COMPLETION_TOKENS_USED, completion_tokens) if prompt_tokens != 0: - span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens) if total_tokens != 0: - span.set_data(TOTAL_TOKENS_USED, total_tokens) + set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens) def _wrap_chat_completion_create(f): @@ -160,14 +182,17 @@ def new_chat_completion(*args, **kwargs): with capture_internal_exceptions(): if _should_send_default_pii() and integration.include_prompts: - span.set_data("ai.input_messages", messages) - span.set_data("ai.model_id", model) - span.set_data("ai.streaming", streaming) + set_data_normalized(span, "ai.input_messages", messages) + + set_data_normalized(span, "ai.model_id", model) + set_data_normalized(span, "ai.streaming", streaming) if hasattr(res, "choices"): if _should_send_default_pii() and integration.include_prompts: - span.set_data( - "ai.responses", list(map(lambda x: x.message, res.choices)) + set_data_normalized( + span, + "ai.responses", + list(map(lambda x: x.message, res.choices)), ) _calculate_chat_completion_usage(messages, res, span) span.__exit__(None, None, None) @@ -200,7 +225,7 @@ def new_iterator(): _should_send_default_pii() and integration.include_prompts ): - span.set_data("ai.responses", all_responses) + set_data_normalized(span, "ai.responses", all_responses) _calculate_chat_completion_usage( messages, res, span, all_responses ) @@ -208,7 +233,7 @@ def new_iterator(): res._iterator = new_iterator() else: - span.set_data("unknown_response", True) + set_data_normalized(span, "unknown_response", True) span.__exit__(None, None, None) return res @@ -238,15 +263,15 @@ def new_embeddings_create(*args, **kwargs): _should_send_default_pii() and integration.include_prompts ): if isinstance(kwargs["input"], str): - span.set_data("ai.input_messages", [kwargs["input"]]) + set_data_normalized(span, "ai.input_messages", [kwargs["input"]]) elif ( isinstance(kwargs["input"], list) and len(kwargs["input"]) > 0 and isinstance(kwargs["input"][0], str) ): - span.set_data("ai.input_messages", kwargs["input"]) + set_data_normalized(span, "ai.input_messages", kwargs["input"]) if "model" in kwargs: - span.set_data("ai.model_id", kwargs["model"]) + set_data_normalized(span, "ai.model_id", kwargs["model"]) try: response = f(*args, **kwargs) except Exception as e: @@ -271,8 +296,8 @@ def new_embeddings_create(*args, **kwargs): if total_tokens == 0: total_tokens = prompt_tokens - span.set_data(PROMPT_TOKENS_USED, prompt_tokens) - span.set_data(TOTAL_TOKENS_USED, total_tokens) + set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens) + set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens) return response diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index ecdedd2694..d9a239e004 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -73,7 +73,7 @@ def test_nonstreaming_chat_completion( if send_default_pii and include_prompts: assert "hello" in span["data"]["ai.input_messages"][0]["content"] - assert "the model response" in span["data"]["ai.responses"][0] + assert "the model response" in span["data"]["ai.responses"][0]["content"] else: assert "ai.input_messages" not in span["data"] assert "ai.responses" not in span["data"] From 1a8db5e99e54265b7bd7c176de10d3f202388bc7 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 15:23:56 +0100 Subject: [PATCH 18/37] Discard open spans after 10 minutes (#2801) OTel spans that are handled in the Sentry span processor can never be finished/closed. This leads to a memory leak. This change makes sure that open spans will be removed from memory after 10 minutes to prevent memory usage from growing constantly. Fixes #2722 --------- Co-authored-by: Daniel Szoke --- .../opentelemetry/span_processor.py | 50 +++++++++- .../opentelemetry/test_span_processor.py | 92 +++++++++++++++++++ 2 files changed, 139 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 0ed4e7f709..0db698e239 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -1,3 +1,5 @@ +from time import time + from opentelemetry.context import get_value # type: ignore from opentelemetry.sdk.trace import SpanProcessor # type: ignore from opentelemetry.semconv.trace import SpanAttributes # type: ignore @@ -33,6 +35,7 @@ from sentry_sdk._types import Event, Hint OPEN_TELEMETRY_CONTEXT = "otel" +SPAN_MAX_TIME_OPEN_MINUTES = 10 def link_trace_context_to_error_event(event, otel_span_map): @@ -76,6 +79,9 @@ class SentrySpanProcessor(SpanProcessor): # type: ignore # The mapping from otel span ids to sentry spans otel_span_map = {} # type: Dict[str, Union[Transaction, SentrySpan]] + # The currently open spans. Elements will be discarded after SPAN_MAX_TIME_OPEN_MINUTES + open_spans = {} # type: dict[int, set[str]] + def __new__(cls): # type: () -> SentrySpanProcessor if not hasattr(cls, "instance"): @@ -90,6 +96,24 @@ def global_event_processor(event, hint): # type: (Event, Hint) -> Event return link_trace_context_to_error_event(event, self.otel_span_map) + def _prune_old_spans(self): + # type: (SentrySpanProcessor) -> None + """ + Prune spans that have been open for too long. + """ + current_time_minutes = int(time() / 60) + for span_start_minutes in list( + self.open_spans.keys() + ): # making a list because we change the dict + # prune empty open spans buckets + if self.open_spans[span_start_minutes] == set(): + self.open_spans.pop(span_start_minutes) + + # prune old buckets + elif current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES: + for span_id in self.open_spans.pop(span_start_minutes): + self.otel_span_map.pop(span_id, None) + def on_start(self, otel_span, parent_context=None): # type: (OTelSpan, Optional[SpanContext]) -> None hub = Hub.current @@ -125,7 +149,9 @@ def on_start(self, otel_span, parent_context=None): sentry_span = sentry_parent_span.start_child( span_id=trace_data["span_id"], description=otel_span.name, - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) else: @@ -135,12 +161,22 @@ def on_start(self, otel_span, parent_context=None): parent_span_id=parent_span_id, trace_id=trace_data["trace_id"], baggage=trace_data["baggage"], - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) self.otel_span_map[trace_data["span_id"]] = sentry_span + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision + self.open_spans.setdefault(span_start_in_minutes, set()).add( + trace_data["span_id"] + ) + self._prune_old_spans() + def on_end(self, otel_span): # type: (OTelSpan) -> None hub = Hub.current @@ -173,7 +209,15 @@ def on_end(self, otel_span): else: self._update_span_with_otel_data(sentry_span, otel_span) - sentry_span.finish(end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9)) + sentry_span.finish( + end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9) + ) # OTel spans have nanosecond precision + + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision + self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id) + self._prune_old_spans() def _is_sentry_span(self, hub, otel_span): # type: (Hub, OTelSpan) -> bool diff --git a/tests/integrations/opentelemetry/test_span_processor.py b/tests/integrations/opentelemetry/test_span_processor.py index b7e5a7928d..02e3059ca8 100644 --- a/tests/integrations/opentelemetry/test_span_processor.py +++ b/tests/integrations/opentelemetry/test_span_processor.py @@ -531,3 +531,95 @@ def test_link_trace_context_to_error_event(): assert "contexts" in event assert "trace" in event["contexts"] assert event["contexts"]["trace"] == fake_trace_context + + +def test_pruning_old_spans_on_start(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + parent_context = {} + fake_client = MagicMock() + fake_client.options = {"instrumenter": "otel"} + fake_client.dsn = "https://1234567890abcdef@o123456.ingest.sentry.io/123456" + + current_hub = MagicMock() + current_hub.client = fake_client + + fake_hub = MagicMock() + fake_hub.current = current_hub + + with mock.patch( + "sentry_sdk.integrations.opentelemetry.span_processor.Hub", fake_hub + ): + span_processor = SentrySpanProcessor() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_start(otel_span, parent_context) + assert sorted(list(span_processor.otel_span_map.keys())) == [ + "111111111abcdef", + "1234567890abcdef", + ] + assert sorted(list(span_processor.open_spans.values())) == [ + {"111111111abcdef"}, + {"1234567890abcdef"}, + ] + + +def test_pruning_old_spans_on_end(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + fake_sentry_span = MagicMock(spec=Span) + fake_sentry_span.set_context = MagicMock() + fake_sentry_span.finish = MagicMock() + + span_processor = SentrySpanProcessor() + span_processor._get_otel_context = MagicMock() + span_processor._update_span_with_otel_data = MagicMock() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + "1234567890abcdef": fake_sentry_span, # should go (because it is closed) + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes: {"1234567890abcdef"}, # should go (because it is closed) + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_end(otel_span) + assert sorted(list(span_processor.otel_span_map.keys())) == ["111111111abcdef"] + assert sorted(list(span_processor.open_spans.values())) == [{"111111111abcdef"}] From 5717f1b17e363cc4e3af6b4bfd886158125300ab Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 12 Mar 2024 16:21:24 +0100 Subject: [PATCH 19/37] ref: Event Type (#2753) Implements type hinting for Event via a TypedDict. This commit mainly adjusts type hints; however, there are also some minor code changes to make the code type-safe following the new changes. Some items in the Event could have their types expanded by being defined as TypedDicts themselves. These items have been indicated with TODO comments. Fixes GH-2357 --- sentry_sdk/_types.py | 64 ++++++++++++++++++- sentry_sdk/api.py | 5 +- sentry_sdk/client.py | 15 +++-- sentry_sdk/crons/api.py | 5 +- sentry_sdk/hub.py | 3 +- sentry_sdk/integrations/_wsgi_common.py | 3 +- sentry_sdk/integrations/aiohttp.py | 9 ++- sentry_sdk/integrations/ariadne.py | 6 +- sentry_sdk/integrations/bottle.py | 2 +- sentry_sdk/integrations/django/__init__.py | 4 +- sentry_sdk/integrations/django/asgi.py | 4 +- sentry_sdk/integrations/falcon.py | 6 +- sentry_sdk/integrations/fastapi.py | 5 +- sentry_sdk/integrations/flask.py | 6 +- sentry_sdk/integrations/gnu_backtrace.py | 6 +- sentry_sdk/integrations/gql.py | 4 +- sentry_sdk/integrations/graphene.py | 3 +- sentry_sdk/integrations/logging.py | 7 +- sentry_sdk/integrations/modules.py | 4 +- sentry_sdk/integrations/pyramid.py | 4 +- sentry_sdk/integrations/quart.py | 7 +- sentry_sdk/integrations/rq.py | 14 ++-- sentry_sdk/integrations/spark/spark_worker.py | 2 +- sentry_sdk/integrations/starlette.py | 9 +-- sentry_sdk/integrations/starlite.py | 6 +- sentry_sdk/integrations/stdlib.py | 2 +- sentry_sdk/integrations/strawberry.py | 18 +++--- sentry_sdk/integrations/tornado.py | 6 +- sentry_sdk/integrations/wsgi.py | 4 +- sentry_sdk/profiler.py | 4 +- sentry_sdk/scope.py | 21 +++--- sentry_sdk/tracing.py | 4 +- sentry_sdk/utils.py | 10 +-- 33 files changed, 176 insertions(+), 96 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 2536541072..49bffb3416 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -9,6 +9,10 @@ if TYPE_CHECKING: + from collections.abc import MutableMapping + + from datetime import datetime + from types import TracebackType from typing import Any from typing import Callable @@ -19,13 +23,69 @@ from typing import Tuple from typing import Type from typing import Union - from typing_extensions import Literal + from typing_extensions import Literal, TypedDict + + # "critical" is an alias of "fatal" recognized by Relay + LogLevelStr = Literal["fatal", "critical", "error", "warning", "info", "debug"] + + Event = TypedDict( + "Event", + { + "breadcrumbs": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "check_in_id": str, + "contexts": dict[str, dict[str, object]], + "dist": str, + "duration": Optional[float], + "environment": str, + "errors": list[dict[str, Any]], # TODO: We can expand on this type + "event_id": str, + "exception": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "extra": MutableMapping[str, object], + "fingerprint": list[str], + "level": LogLevelStr, + "logentry": Mapping[str, object], + "logger": str, + "measurements": dict[str, object], + "message": str, + "modules": dict[str, str], + "monitor_config": Mapping[str, object], + "monitor_slug": Optional[str], + "platform": Literal["python"], + "profile": object, # Should be sentry_sdk.profiler.Profile, but we can't import that here due to circular imports + "release": str, + "request": dict[str, object], + "sdk": Mapping[str, object], + "server_name": str, + "spans": list[dict[str, object]], + "stacktrace": dict[ + str, object + ], # We access this key in the code, but I am unsure whether we ever set it + "start_timestamp": datetime, + "status": Optional[str], + "tags": MutableMapping[ + str, str + ], # Tags must be less than 200 characters each + "threads": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "timestamp": Optional[datetime], # Must be set before sending the event + "transaction": str, + "transaction_info": Mapping[str, Any], # TODO: We can expand on this type + "type": Literal["check_in", "transaction"], + "user": dict[str, object], + "_metrics_summary": dict[str, object], + }, + total=False, + ) ExcInfo = Tuple[ Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType] ] - Event = Dict[str, Any] Hint = Dict[str, Any] Breadcrumb = Dict[str, Any] diff --git a/sentry_sdk/api.py b/sentry_sdk/api.py index 1b56571bfa..3148c43f1a 100644 --- a/sentry_sdk/api.py +++ b/sentry_sdk/api.py @@ -22,6 +22,7 @@ BreadcrumbHint, ExcInfo, MeasurementUnit, + LogLevelStr, ) from sentry_sdk.tracing import Span @@ -91,7 +92,7 @@ def capture_event( @hubmethod def capture_message( message, # type: str - level=None, # type: Optional[str] + level=None, # type: Optional[LogLevelStr] scope=None, # type: Optional[Any] **scope_kwargs # type: Any ): @@ -189,7 +190,7 @@ def set_user(value): @scopemethod def set_level(value): - # type: (str) -> None + # type: (LogLevelStr) -> None return Hub.current.scope.set_level(value) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 64e65a8cb6..296de71804 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1,3 +1,8 @@ +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping # type: ignore[attr-defined] + from importlib import import_module import os import uuid @@ -38,7 +43,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope -from sentry_sdk.profiler import has_profiling_enabled, setup_profiler +from sentry_sdk.profiler import has_profiling_enabled, Profile, setup_profiler from sentry_sdk.scrubber import EventScrubber from sentry_sdk.monitor import Monitor from sentry_sdk.spotlight import setup_spotlight @@ -393,7 +398,7 @@ def _prepare_event( for key in "release", "environment", "server_name", "dist": if event.get(key) is None and self.options[key] is not None: - event[key] = text_type(self.options[key]).strip() + event[key] = text_type(self.options[key]).strip() # type: ignore[literal-required] if event.get("sdk") is None: sdk_info = dict(SDK_INFO) sdk_info["integrations"] = sorted(self.integrations.keys()) @@ -567,7 +572,7 @@ def _update_session_from_event( errored = True for error in exceptions: mechanism = error.get("mechanism") - if mechanism and mechanism.get("handled") is False: + if isinstance(mechanism, Mapping) and mechanism.get("handled") is False: crashed = True break @@ -659,7 +664,7 @@ def capture_event( headers = { "event_id": event_opt["event_id"], "sent_at": format_timestamp(datetime_utcnow()), - } + } # type: dict[str, object] if dynamic_sampling_context: headers["trace"] = dynamic_sampling_context @@ -667,7 +672,7 @@ def capture_event( envelope = Envelope(headers=headers) if is_transaction: - if profile is not None: + if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) envelope.add_transaction(event_opt) elif is_checkin: diff --git a/sentry_sdk/crons/api.py b/sentry_sdk/crons/api.py index cd240a7dcd..92d113a924 100644 --- a/sentry_sdk/crons/api.py +++ b/sentry_sdk/crons/api.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Optional + from sentry_sdk._types import Event def _create_check_in_event( @@ -15,7 +16,7 @@ def _create_check_in_event( duration_s=None, monitor_config=None, ): - # type: (Optional[str], Optional[str], Optional[str], Optional[float], Optional[Dict[str, Any]]) -> Dict[str, Any] + # type: (Optional[str], Optional[str], Optional[str], Optional[float], Optional[Dict[str, Any]]) -> Event options = Hub.current.client.options if Hub.current.client else {} check_in_id = check_in_id or uuid.uuid4().hex # type: str @@ -27,7 +28,7 @@ def _create_check_in_event( "duration": duration_s, "environment": options.get("environment", None), "release": options.get("release", None), - } + } # type: Event if monitor_config: check_in["monitor_config"] = monitor_config diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index c339528821..a716d33433 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -40,6 +40,7 @@ Breadcrumb, BreadcrumbHint, ExcInfo, + LogLevelStr, ) from sentry_sdk.consts import ClientConstructor @@ -335,7 +336,7 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs): return last_event_id def capture_message(self, message, level=None, scope=None, **scope_kwargs): - # type: (str, Optional[str], Optional[Scope], Any) -> Optional[str] + # type: (str, Optional[LogLevelStr], Optional[Scope], Any) -> Optional[str] """ Captures a message. diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 5a41654498..b72ebde126 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -22,6 +22,7 @@ from typing import Dict from typing import Optional from typing import Union + from sentry_sdk._types import Event SENSITIVE_ENV_KEYS = ( @@ -59,7 +60,7 @@ def __init__(self, request): self.request = request def extract_into_event(self, event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None client = Hub.current.client if client is None: return diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index e51bdeeac3..19974030ed 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -48,13 +48,12 @@ from aiohttp import TraceRequestStartParams, TraceRequestEndParams from types import SimpleNamespace from typing import Any - from typing import Dict from typing import Optional from typing import Tuple from typing import Union from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor TRANSACTION_STYLE_VALUES = ("handler_name", "method_and_path_pattern") @@ -256,10 +255,10 @@ async def on_request_end(session, trace_config_ctx, params): def _make_request_processor(weak_request): # type: (weakref.ReferenceType[Request]) -> EventProcessor def aiohttp_processor( - event, # type: Dict[str, Any] - hint, # type: Dict[str, Tuple[type, BaseException, Any]] + event, # type: Event + hint, # type: dict[str, Tuple[type, BaseException, Any]] ): - # type: (...) -> Dict[str, Any] + # type: (...) -> Event request = weak_request() if request is None: return event diff --git a/sentry_sdk/integrations/ariadne.py b/sentry_sdk/integrations/ariadne.py index 86d6b5e28e..5b98a88443 100644 --- a/sentry_sdk/integrations/ariadne.py +++ b/sentry_sdk/integrations/ariadne.py @@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional from ariadne.types import GraphQLError, GraphQLResult, GraphQLSchema, QueryParser # type: ignore from graphql.language.ast import DocumentNode # type: ignore - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor class AriadneIntegration(Integration): @@ -131,7 +131,7 @@ def _make_request_event_processor(data): """Add request data and api_target to events.""" def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event if not isinstance(data, dict): return event @@ -163,7 +163,7 @@ def _make_response_event_processor(response): """Add response data to the event's response context.""" def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii() and response.get("errors"): contexts = event.setdefault("contexts", {}) diff --git a/sentry_sdk/integrations/bottle.py b/sentry_sdk/integrations/bottle.py index cc6360daa3..6f3678466e 100644 --- a/sentry_sdk/integrations/bottle.py +++ b/sentry_sdk/integrations/bottle.py @@ -200,7 +200,7 @@ def _make_request_event_processor(app, request, integration): # type: (Bottle, LocalRequest, BottleIntegration) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event _set_transaction_name_and_source(event, integration.transaction_style, request) with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index 426565e645..98834a4693 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -472,7 +472,7 @@ def sentry_patched_get_response(self, request): def _make_wsgi_request_event_processor(weak_request, integration): # type: (Callable[[], WSGIRequest], DjangoIntegration) -> EventProcessor def wsgi_request_event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. @@ -570,7 +570,7 @@ def parsed_body(self): def _set_user_info(request, event): - # type: (WSGIRequest, Dict[str, Any]) -> None + # type: (WSGIRequest, Event) -> None user_info = event.setdefault("user", {}) user = getattr(request, "user", None) diff --git a/sentry_sdk/integrations/django/asgi.py b/sentry_sdk/integrations/django/asgi.py index 18f6a58811..e1ba678011 100644 --- a/sentry_sdk/integrations/django/asgi.py +++ b/sentry_sdk/integrations/django/asgi.py @@ -26,13 +26,13 @@ from django.core.handlers.asgi import ASGIRequest from django.http.response import HttpResponse - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor def _make_asgi_request_event_processor(request): # type: (ASGIRequest) -> EventProcessor def asgi_request_event_processor(event, hint): - # type: (dict[str, Any], dict[str, Any]) -> dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. diff --git a/sentry_sdk/integrations/falcon.py b/sentry_sdk/integrations/falcon.py index 3fab11cfeb..d5e2480485 100644 --- a/sentry_sdk/integrations/falcon.py +++ b/sentry_sdk/integrations/falcon.py @@ -18,7 +18,7 @@ from typing import Dict from typing import Optional - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor # In Falcon 3.0 `falcon.api_helpers` is renamed to `falcon.app_helpers` # and `falcon.API` to `falcon.App` @@ -258,7 +258,7 @@ def _has_http_5xx_status(response): def _set_transaction_name_and_source(event, transaction_style, request): - # type: (Dict[str, Any], str, falcon.Request) -> None + # type: (Event, str, falcon.Request) -> None name_for_style = { "uri_template": request.uri_template, "path": request.path, @@ -271,7 +271,7 @@ def _make_request_event_processor(req, integration): # type: (falcon.Request, FalconIntegration) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event _set_transaction_name_and_source(event, integration.transaction_style, req) with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/fastapi.py b/sentry_sdk/integrations/fastapi.py index 6fbe53b92b..33a5591cc4 100644 --- a/sentry_sdk/integrations/fastapi.py +++ b/sentry_sdk/integrations/fastapi.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from typing import Any, Callable, Dict from sentry_sdk.scope import Scope + from sentry_sdk._types import Event try: from sentry_sdk.integrations.starlette import ( @@ -111,9 +112,9 @@ async def _sentry_app(*args, **kwargs): info = await extractor.extract_request_info() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, Dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event # Extract information from request request_info = event.get("request", {}) diff --git a/sentry_sdk/integrations/flask.py b/sentry_sdk/integrations/flask.py index 453ab48ce3..f0bc3d7750 100644 --- a/sentry_sdk/integrations/flask.py +++ b/sentry_sdk/integrations/flask.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from typing import Any, Callable, Dict, Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor from sentry_sdk.integrations.wsgi import _ScopedResponse from werkzeug.datastructures import FileStorage, ImmutableMultiDict @@ -172,7 +172,7 @@ def _make_request_event_processor(app, request, integration): # type: (Flask, Callable[[], Request], FlaskIntegration) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to @@ -211,7 +211,7 @@ def _capture_exception(sender, exception, **kwargs): def _add_user_to_event(event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None if flask_login is None: return diff --git a/sentry_sdk/integrations/gnu_backtrace.py b/sentry_sdk/integrations/gnu_backtrace.py index ad9c437878..f8321a6cd7 100644 --- a/sentry_sdk/integrations/gnu_backtrace.py +++ b/sentry_sdk/integrations/gnu_backtrace.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict + from sentry_sdk._types import Event MODULE_RE = r"[a-zA-Z0-9/._:\\-]+" @@ -42,13 +42,13 @@ def setup_once(): # type: () -> None @add_global_event_processor def process_gnu_backtrace(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): return _process_gnu_backtrace(event, hint) def _process_gnu_backtrace(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event if Hub.current.get_integration(GnuBacktraceIntegration) is None: return event diff --git a/sentry_sdk/integrations/gql.py b/sentry_sdk/integrations/gql.py index 79fc8d022f..9db6632a4a 100644 --- a/sentry_sdk/integrations/gql.py +++ b/sentry_sdk/integrations/gql.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Tuple, Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor EventDataType = Dict[str, Union[str, Tuple[VariableDefinitionNode, ...]]] @@ -112,7 +112,7 @@ def sentry_patched_execute(self, document, *args, **kwargs): def _make_gql_event_processor(client, document): # type: (gql.Client, DocumentNode) -> EventProcessor def processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event try: errors = hint["exc_info"][1].errors except (AttributeError, KeyError): diff --git a/sentry_sdk/integrations/graphene.py b/sentry_sdk/integrations/graphene.py index fa753d0812..b9c3b26018 100644 --- a/sentry_sdk/integrations/graphene.py +++ b/sentry_sdk/integrations/graphene.py @@ -19,6 +19,7 @@ from graphene.language.source import Source # type: ignore from graphql.execution import ExecutionResult # type: ignore from graphql.type import GraphQLSchema # type: ignore + from sentry_sdk._types import Event class GrapheneIntegration(Integration): @@ -100,7 +101,7 @@ async def _sentry_patched_graphql_async(schema, source, *args, **kwargs): def _event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event if _should_send_default_pii(): request_info = event.setdefault("request", {}) request_info["api_target"] = "graphql" diff --git a/sentry_sdk/integrations/logging.py b/sentry_sdk/integrations/logging.py index ee6bb8e1d1..d455983fc5 100644 --- a/sentry_sdk/integrations/logging.py +++ b/sentry_sdk/integrations/logging.py @@ -16,6 +16,7 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import MutableMapping from logging import LogRecord from typing import Any from typing import Dict @@ -156,7 +157,7 @@ def _logging_to_event_level(self, record): ) def _extra_from_record(self, record): - # type: (LogRecord) -> Dict[str, None] + # type: (LogRecord) -> MutableMapping[str, object] return { k: v for k, v in iteritems(vars(record)) @@ -225,7 +226,9 @@ def _emit(self, record): hint["log_record"] = record - event["level"] = self._logging_to_event_level(record) + level = self._logging_to_event_level(record) + if level in {"debug", "info", "warning", "error", "critical", "fatal"}: + event["level"] = level # type: ignore[typeddict-item] event["logger"] = record.name # Log records from `warnings` module as separate issues diff --git a/sentry_sdk/integrations/modules.py b/sentry_sdk/integrations/modules.py index 5b595b4032..fa0fbf8936 100644 --- a/sentry_sdk/integrations/modules.py +++ b/sentry_sdk/integrations/modules.py @@ -9,8 +9,6 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict - from sentry_sdk._types import Event @@ -22,7 +20,7 @@ def setup_once(): # type: () -> None @add_global_event_processor def processor(event, hint): - # type: (Event, Any) -> Dict[str, Any] + # type: (Event, Any) -> Event if event.get("type") == "transaction": return event diff --git a/sentry_sdk/integrations/pyramid.py b/sentry_sdk/integrations/pyramid.py index 80750f0268..3b9b2fdb96 100644 --- a/sentry_sdk/integrations/pyramid.py +++ b/sentry_sdk/integrations/pyramid.py @@ -36,7 +36,7 @@ from webob.compat import cgi_FieldStorage # type: ignore from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor if getattr(Request, "authenticated_userid", None): @@ -216,7 +216,7 @@ def size_of_file(self, postdata): def _make_event_processor(weak_request, integration): # type: (Callable[[], Request], PyramidIntegration) -> EventProcessor def pyramid_event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event request = weak_request() if request is None: return event diff --git a/sentry_sdk/integrations/quart.py b/sentry_sdk/integrations/quart.py index 4dee751d65..8803fa7cea 100644 --- a/sentry_sdk/integrations/quart.py +++ b/sentry_sdk/integrations/quart.py @@ -20,10 +20,9 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict from typing import Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor try: import quart_auth # type: ignore @@ -186,7 +185,7 @@ async def _request_websocket_started(app, **kwargs): def _make_request_event_processor(app, request, integration): # type: (Quart, Request, QuartIntegration) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. @@ -231,7 +230,7 @@ async def _capture_exception(sender, exception, **kwargs): def _add_user_to_event(event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None if quart_auth is None: return diff --git a/sentry_sdk/integrations/rq.py b/sentry_sdk/integrations/rq.py index b5eeb0be85..2b32e59880 100644 --- a/sentry_sdk/integrations/rq.py +++ b/sentry_sdk/integrations/rq.py @@ -27,9 +27,9 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, Dict + from typing import Any, Callable - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor from sentry_sdk.utils import ExcInfo from rq.job import Job @@ -126,12 +126,12 @@ def sentry_patched_enqueue_job(self, job, **kwargs): def _make_event_processor(weak_job): # type: (Callable[[], Job]) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event job = weak_job() if job is not None: with capture_internal_exceptions(): extra = event.setdefault("extra", {}) - extra["rq-job"] = { + rq_job = { "job_id": job.id, "func": job.func_name, "args": job.args, @@ -140,9 +140,11 @@ def event_processor(event, hint): } if job.enqueued_at: - extra["rq-job"]["enqueued_at"] = format_timestamp(job.enqueued_at) + rq_job["enqueued_at"] = format_timestamp(job.enqueued_at) if job.started_at: - extra["rq-job"]["started_at"] = format_timestamp(job.started_at) + rq_job["started_at"] = format_timestamp(job.started_at) + + extra["rq-job"] = rq_job if "exc_info" in hint: with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/spark/spark_worker.py b/sentry_sdk/integrations/spark/spark_worker.py index cd4eb0f28b..632e870973 100644 --- a/sentry_sdk/integrations/spark/spark_worker.py +++ b/sentry_sdk/integrations/spark/spark_worker.py @@ -58,7 +58,7 @@ def _capture_exception(exc_info, hub): if rv: rv.reverse() hint = event_hint_with_exc_info(exc_info) - event = {"level": "error", "exception": {"values": rv}} + event = {"level": "error", "exception": {"values": rv}} # type: Event _tag_task_context() diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index ed95c757f1..79bb18aa78 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -32,6 +32,7 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Tuple from sentry_sdk.scope import Scope as SentryScope + from sentry_sdk._types import Event try: import starlette # type: ignore @@ -407,9 +408,9 @@ async def _sentry_async_func(*args, **kwargs): info = await extractor.extract_request_info() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event # Add info from request to event request_info = event.get("request", {}) @@ -455,9 +456,9 @@ def _sentry_sync_func(*args, **kwargs): cookies = extractor.extract_cookies_from_request() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # Extract information from request request_info = event.get("request", {}) diff --git a/sentry_sdk/integrations/starlite.py b/sentry_sdk/integrations/starlite.py index 3900ce8c8a..070675c2e7 100644 --- a/sentry_sdk/integrations/starlite.py +++ b/sentry_sdk/integrations/starlite.py @@ -219,7 +219,11 @@ def event_processor(event: "Event", _: "Dict[str, Any]") -> "Event": tx_info = {"source": TRANSACTION_SOURCE_ROUTE} event.update( - request=request_info, transaction=tx_name, transaction_info=tx_info + { + "request": request_info, + "transaction": tx_name, + "transaction_info": tx_info, + } ) return event diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index a5c3bfb2ae..0a17834a40 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -39,7 +39,7 @@ "name": platform.python_implementation(), "version": "%s.%s.%s" % (sys.version_info[:3]), "build": sys.version, -} +} # type: dict[str, object] class StdlibIntegration(Integration): diff --git a/sentry_sdk/integrations/strawberry.py b/sentry_sdk/integrations/strawberry.py index 8f4314f663..3d450e0692 100644 --- a/sentry_sdk/integrations/strawberry.py +++ b/sentry_sdk/integrations/strawberry.py @@ -29,11 +29,11 @@ raise DidNotEnable("strawberry-graphql is not installed") if TYPE_CHECKING: - from typing import Any, Callable, Dict, Generator, List, Optional + from typing import Any, Callable, Generator, List, Optional from graphql import GraphQLError, GraphQLResolveInfo # type: ignore from strawberry.http import GraphQLHTTPResponse from strawberry.types import ExecutionContext, ExecutionResult # type: ignore - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor ignore_logger("strawberry.execution") @@ -349,21 +349,21 @@ def _make_request_event_processor(execution_context): # type: (ExecutionContext) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii(): request_data = event.setdefault("request", {}) request_data["api_target"] = "graphql" if not request_data.get("data"): - request_data["data"] = {"query": execution_context.query} + data = {"query": execution_context.query} if execution_context.variables: - request_data["data"]["variables"] = execution_context.variables + data["variables"] = execution_context.variables if execution_context.operation_name: - request_data["data"][ - "operationName" - ] = execution_context.operation_name + data["operationName"] = execution_context.operation_name + + request_data["data"] = data else: try: @@ -380,7 +380,7 @@ def _make_response_event_processor(response_data): # type: (GraphQLHTTPResponse) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii(): contexts = event.setdefault("contexts", {}) diff --git a/sentry_sdk/integrations/tornado.py b/sentry_sdk/integrations/tornado.py index 8af93c47f3..c6f7700f12 100644 --- a/sentry_sdk/integrations/tornado.py +++ b/sentry_sdk/integrations/tornado.py @@ -41,7 +41,7 @@ from typing import Callable from typing import Generator - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor class TornadoIntegration(Integration): @@ -155,7 +155,7 @@ def _capture_exception(ty, value, tb): def _make_event_processor(weak_handler): # type: (Callable[[], RequestHandler]) -> EventProcessor def tornado_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event handler = weak_handler() if handler is None: return event @@ -164,7 +164,7 @@ def tornado_processor(event, hint): with capture_internal_exceptions(): method = getattr(handler, handler.request.method.lower()) - event["transaction"] = transaction_from_function(method) + event["transaction"] = transaction_from_function(method) or "" event["transaction_info"] = {"source": TRANSACTION_SOURCE_COMPONENT} with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index 0d53766efb..e7fd0da66d 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -27,7 +27,7 @@ from typing import Protocol from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor WsgiResponseIter = TypeVar("WsgiResponseIter") WsgiResponseHeaders = TypeVar("WsgiResponseHeaders") @@ -254,7 +254,7 @@ def _make_wsgi_event_processor(environ, use_x_forwarded_for): headers = _filter_headers(dict(_get_headers(environ))) def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event with capture_internal_exceptions(): # if the code below fails halfway through we at least have some data request_info = event.setdefault("request", {}) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index be954b2a2c..ef4868f745 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -62,7 +62,7 @@ from typing_extensions import TypedDict import sentry_sdk.tracing - from sentry_sdk._types import SamplingContext, ProfilerMode + from sentry_sdk._types import Event, SamplingContext, ProfilerMode ThreadId = str @@ -673,7 +673,7 @@ def process(self): } def to_json(self, event_opt, options): - # type: (Any, Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Dict[str, Any] profile = self.process() set_in_app_in_frames( diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index b0dcca8b15..80537cd8bf 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -33,6 +33,8 @@ ) if TYPE_CHECKING: + from collections.abc import MutableMapping + from typing import Any from typing import Callable from typing import Deque @@ -53,6 +55,7 @@ EventProcessor, ExcInfo, Hint, + LogLevelStr, Type, ) @@ -414,15 +417,15 @@ def iter_trace_propagation_headers(self, *args, **kwargs): def clear(self): # type: () -> None """Clears the entire scope.""" - self._level = None # type: Optional[str] + self._level = None # type: Optional[LogLevelStr] self._fingerprint = None # type: Optional[List[str]] self._transaction = None # type: Optional[str] - self._transaction_info = {} # type: Dict[str, str] + self._transaction_info = {} # type: MutableMapping[str, str] self._user = None # type: Optional[Dict[str, Any]] self._tags = {} # type: Dict[str, Any] self._contexts = {} # type: Dict[str, Dict[str, Any]] - self._extras = {} # type: Dict[str, Any] + self._extras = {} # type: MutableMapping[str, Any] self._attachments = [] # type: List[Attachment] self.clear_breadcrumbs() @@ -438,12 +441,12 @@ def clear(self): @_attr_setter def level(self, value): - # type: (Optional[str]) -> None + # type: (Optional[LogLevelStr]) -> None """When set this overrides the level. Deprecated in favor of set_level.""" self._level = value def set_level(self, value): - # type: (Optional[str]) -> None + # type: (Optional[LogLevelStr]) -> None """Sets the level for the scope.""" self._level = value @@ -848,7 +851,7 @@ def capture_event(self, event, hint=None, client=None, scope=None, **scope_kwarg def capture_message( self, message, level=None, client=None, scope=None, **scope_kwargs ): - # type: (str, Optional[str], Optional[sentry_sdk.Client], Optional[Scope], Any) -> Optional[str] + # type: (str, Optional[LogLevelStr], Optional[sentry_sdk.Client], Optional[Scope], Any) -> Optional[str] """ Captures a message. @@ -876,7 +879,7 @@ def capture_message( event = { "message": message, "level": level, - } + } # type: Event return self.capture_event(event, client=client, scope=scope, **scope_kwargs) @@ -1079,7 +1082,7 @@ def _apply_contexts_to_event(self, event, hint, options): # Add "reply_id" context try: - replay_id = contexts["trace"]["dynamic_sampling_context"]["replay_id"] + replay_id = contexts["trace"]["dynamic_sampling_context"]["replay_id"] # type: ignore except (KeyError, TypeError): replay_id = None @@ -1192,7 +1195,7 @@ def update_from_scope(self, scope): def update_from_kwargs( self, user=None, # type: Optional[Any] - level=None, # type: Optional[str] + level=None, # type: Optional[LogLevelStr] extras=None, # type: Optional[Dict[str, Any]] contexts=None, # type: Optional[Dict[str, Any]] tags=None, # type: Optional[Dict[str, str]] diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 80e9ace939..bac1ceaa60 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: import typing - from collections.abc import Callable + from collections.abc import Callable, MutableMapping from typing import Any from typing import Dict from typing import Iterator @@ -151,7 +151,7 @@ def __init__( self.description = description self.status = status self.hub = hub - self._tags = {} # type: Dict[str, str] + self._tags = {} # type: MutableMapping[str, str] self._data = {} # type: Dict[str, Any] self._containing_transaction = containing_transaction if start_timestamp is None: diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 7c10d7cf43..150130a057 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -75,7 +75,7 @@ Union, ) - from sentry_sdk._types import EndpointType, ExcInfo + from sentry_sdk._types import EndpointType, Event, ExcInfo epoch = datetime(1970, 1, 1) @@ -975,7 +975,7 @@ def to_string(value): def iter_event_stacktraces(event): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Event) -> Iterator[Dict[str, Any]] if "stacktrace" in event: yield event["stacktrace"] if "threads" in event: @@ -989,14 +989,14 @@ def iter_event_stacktraces(event): def iter_event_frames(event): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Event) -> Iterator[Dict[str, Any]] for stacktrace in iter_event_stacktraces(event): for frame in stacktrace.get("frames") or (): yield frame def handle_in_app(event, in_app_exclude=None, in_app_include=None, project_root=None): - # type: (Dict[str, Any], Optional[List[str]], Optional[List[str]], Optional[str]) -> Dict[str, Any] + # type: (Event, Optional[List[str]], Optional[List[str]], Optional[str]) -> Event for stacktrace in iter_event_stacktraces(event): set_in_app_in_frames( stacktrace.get("frames"), @@ -1074,7 +1074,7 @@ def event_from_exception( client_options=None, # type: Optional[Dict[str, Any]] mechanism=None, # type: Optional[Dict[str, Any]] ): - # type: (...) -> Tuple[Dict[str, Any], Dict[str, Any]] + # type: (...) -> Tuple[Event, Dict[str, Any]] exc_info = exc_info_from_error(exc_info) hint = event_hint_with_exc_info(exc_info) return ( From e7535c112ac6a6e8e166697a0a5313055fb04f6a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:15:11 +0000 Subject: [PATCH 20/37] build(deps): bump types-protobuf from 4.24.0.20240302 to 4.24.0.20240311 (#2797) Bumps [types-protobuf](https://github.com/python/typeshed) from 4.24.0.20240302 to 4.24.0.20240311. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-protobuf dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Anton Pirker Co-authored-by: Ivana Kellyerova --- linter-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linter-requirements.txt b/linter-requirements.txt index 42a0313e31..c390f5fe70 100644 --- a/linter-requirements.txt +++ b/linter-requirements.txt @@ -2,7 +2,7 @@ mypy black flake8==5.0.4 # flake8 depends on pyflakes>=3.0.0 and this dropped support for Python 2 "# type:" comments types-certifi -types-protobuf==4.24.0.20240302 # newer raises an error on mypy sentry_sdk +types-protobuf==4.24.0.20240311 # newer raises an error on mypy sentry_sdk types-redis types-setuptools pymongo # There is no separate types module. From 8f9d49e26974253acf8eec03b6b9b730240bbf0f Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Wed, 13 Mar 2024 12:08:23 +0000 Subject: [PATCH 21/37] release: 1.42.0 --- CHANGELOG.md | 12 ++++++++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cef63eab1b..f845470e19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 1.42.0 + +### Various fixes & improvements + +- build(deps): bump types-protobuf from 4.24.0.20240302 to 4.24.0.20240311 (#2797) by @dependabot +- ref: Event Type (#2753) by @szokeasaurusrex +- Discard open spans after 10 minutes (#2801) by @antonpirker +- Add a method for normalizing data passed to set_data (#2800) by @colin-sentry +- OpenAI integration (#2791) by @colin-sentry +- Propagate sentry-trace and baggage to huey tasks (#2792) by @cnschn +- ref: Improve scrub_dict typing (#2768) by @szokeasaurusrex + ## 1.41.0 ### Various fixes & improvements diff --git a/docs/conf.py b/docs/conf.py index 8a53738e61..48bf8dc82e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "1.41.0" +release = "1.42.0" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index e4edfddef1..83076c762f 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -320,4 +320,4 @@ def _get_default_options(): del _get_default_options -VERSION = "1.41.0" +VERSION = "1.42.0" diff --git a/setup.py b/setup.py index 0299bf91fb..f17ee954b1 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="1.41.0", + version="1.42.0", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", From d27c5cddec3e37829028bb48feda4134288b886a Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 13 Mar 2024 13:17:05 +0100 Subject: [PATCH 22/37] Update changelog --- CHANGELOG.md | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f845470e19..84708cd6ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,35 @@ ### Various fixes & improvements -- build(deps): bump types-protobuf from 4.24.0.20240302 to 4.24.0.20240311 (#2797) by @dependabot -- ref: Event Type (#2753) by @szokeasaurusrex -- Discard open spans after 10 minutes (#2801) by @antonpirker -- Add a method for normalizing data passed to set_data (#2800) by @colin-sentry -- OpenAI integration (#2791) by @colin-sentry -- Propagate sentry-trace and baggage to huey tasks (#2792) by @cnschn -- ref: Improve scrub_dict typing (#2768) by @szokeasaurusrex +- **New integration:** [OpenAI integration](https://docs.sentry.io/platforms/python/integrations/openai/) (#2791) by @colin-sentry + + We added an integration for OpenAI to capture errors and also performance data when using the OpenAI Python SDK. + + Useage: + + This integrations is auto-enabling, so if you have the `openai` package in your project it will be enabled. Just initialize Sentry before you create your OpenAI client. + + ```python + from openai import OpenAI + + import sentry_sdk + + sentry_sdk.init( + dsn="___PUBLIC_DSN___", + enable_tracing=True, + traces_sample_rate=1.0, + ) + + client = OpenAI() + ``` + + For more information, see the documentation for [OpenAI integration](https://docs.sentry.io/platforms/python/integrations/openai/). + +- Discard open OpenTelemetry spans after 10 minutes (#2801) by @antonpirker +- Propagate sentry-trace and baggage headers to Huey tasks (#2792) by @cnschn +- Added Event type (#2753) by @szokeasaurusrex +- Improve scrub_dict typing (#2768) by @szokeasaurusrex +- Dependencies: bump types-protobuf from 4.24.0.20240302 to 4.24.0.20240311 (#2797) by @dependabot ## 1.41.0 From ab0c32e284e0ecb7e8719595e5add3314bbe8292 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 18 Mar 2024 09:21:09 +0100 Subject: [PATCH 23/37] Fixed OpenAI tests (#2834) This will prevent the streaming reponse OpenAI tests to fail. --- tests/integrations/openai/test_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index d9a239e004..074d859274 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -99,7 +99,7 @@ def test_streaming_chat_completion( events = capture_events() client = OpenAI(api_key="z") - returned_stream = Stream(cast_to=None, response=None, client=None) + returned_stream = Stream(cast_to=None, response=None, client=client) returned_stream._iterator = [ ChatCompletionChunk( id="1", From 9dc517b7dd3224d5d6b708cc87671b2dbda644f5 Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Mon, 18 Mar 2024 09:44:44 +0100 Subject: [PATCH 24/37] Re-export `Event` in `types.py` (#2829) End-users may need to use the Event type for their type hinting to work following the Event type changes. However, we define Event in a private module sentry_sdk._types, which provides no stability guarantees. Therefore, this PR creates a new public module sentry_sdk.types, where we re-export the Event type, and explicitly make it available as public API via sentry_sdk.types.Event. The new sentry_sdk.types module includes a docstring to inform users that we reserve the right to modify types in minor releases, since we consider types to be a form of documentation (they are not enforced by the Python language), but that we guarantee that we will only remove type definitions in a major release. --- sentry_sdk/types.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 sentry_sdk/types.py diff --git a/sentry_sdk/types.py b/sentry_sdk/types.py new file mode 100644 index 0000000000..5c46de7f88 --- /dev/null +++ b/sentry_sdk/types.py @@ -0,0 +1,14 @@ +""" +This module contains type definitions for the Sentry SDK's public API. +The types are re-exported from the internal module `sentry_sdk._types`. + +Disclaimer: Since types are a form of documentation, type definitions +may change in minor releases. Removing a type would be considered a +breaking change, and so we will only remove type definitions in major +releases. +""" + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from sentry_sdk._types import Event, Hint # noqa: F401 From 9bdd029cc7dd5d4a698e92a0883e601a01d760ee Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 18 Mar 2024 10:30:12 +0100 Subject: [PATCH 25/37] Small APIdocs improvement (#2828) This PR makes sure all apidocs are recreated always (by deleting an eventually existing docs/_build folder) and also adds some minor changes to set_level and set_tag to make the types of parameters clear. --- Makefile | 1 + sentry_sdk/scope.py | 55 +++++++++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 32cdbb1fff..ac0ef51f5f 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ lint: .venv apidocs: .venv @$(VENV_PATH)/bin/pip install --editable . @$(VENV_PATH)/bin/pip install -U -r ./docs-requirements.txt + rm -rf docs/_build @$(VENV_PATH)/bin/sphinx-build -vv -W -b html docs/ docs/_build .PHONY: apidocs diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index 80537cd8bf..cd974e4a52 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -441,13 +441,28 @@ def clear(self): @_attr_setter def level(self, value): - # type: (Optional[LogLevelStr]) -> None - """When set this overrides the level. Deprecated in favor of set_level.""" + # type: (LogLevelStr) -> None + """ + When set this overrides the level. + + .. deprecated:: 1.0.0 + Use :func:`set_level` instead. + + :param value: The level to set. + """ + logger.warning( + "Deprecated: use .set_level() instead. This will be removed in the future." + ) + self._level = value def set_level(self, value): - # type: (Optional[LogLevelStr]) -> None - """Sets the level for the scope.""" + # type: (LogLevelStr) -> None + """ + Sets the level for the scope. + + :param value: The level to set. + """ self._level = value @_attr_setter @@ -555,20 +570,24 @@ def profile(self, profile): self._profile = profile - def set_tag( - self, - key, # type: str - value, # type: Any - ): - # type: (...) -> None - """Sets a tag for a key to a specific value.""" + def set_tag(self, key, value): + # type: (str, Any) -> None + """ + Sets a tag for a key to a specific value. + + :param key: Key of the tag to set. + + :param value: Value of the tag to set. + """ self._tags[key] = value - def remove_tag( - self, key # type: str - ): - # type: (...) -> None - """Removes a specific tag.""" + def remove_tag(self, key): + # type: (str) -> None + """ + Removes a specific tag. + + :param key: Key of the tag to remove. + """ self._tags.pop(key, None) def set_context( @@ -577,7 +596,9 @@ def set_context( value, # type: Dict[str, Any] ): # type: (...) -> None - """Binds a context at a certain key to a specific value.""" + """ + Binds a context at a certain key to a specific value. + """ self._contexts[key] = value def remove_context( From 68b9180480388c6bbcc89d65ee56ebe0782f4395 Mon Sep 17 00:00:00 2001 From: Kyle Wigley <9877221+kwigley@users.noreply.github.com> Date: Mon, 18 Mar 2024 12:09:56 -0400 Subject: [PATCH 26/37] feat(integrations): Add support for celery-redbeat cron tasks (#2643) --------- Co-authored-by: Ivana Kellyerova --- sentry_sdk/integrations/celery.py | 62 +++++++++++++++++++ setup.py | 1 + .../celery/test_celery_beat_crons.py | 54 ++++++++++++++++ 3 files changed, 117 insertions(+) diff --git a/sentry_sdk/integrations/celery.py b/sentry_sdk/integrations/celery.py index 0fd983de8d..f2e1aff48a 100644 --- a/sentry_sdk/integrations/celery.py +++ b/sentry_sdk/integrations/celery.py @@ -56,6 +56,11 @@ except ImportError: raise DidNotEnable("Celery not installed") +try: + from redbeat.schedulers import RedBeatScheduler # type: ignore +except ImportError: + RedBeatScheduler = None + CELERY_CONTROL_FLOW_EXCEPTIONS = (Retry, Ignore, Reject) @@ -76,6 +81,7 @@ def __init__( if monitor_beat_tasks: _patch_beat_apply_entry() + _patch_redbeat_maybe_due() _setup_celery_beat_signals() @staticmethod @@ -535,6 +541,62 @@ def sentry_apply_entry(*args, **kwargs): Scheduler.apply_entry = sentry_apply_entry +def _patch_redbeat_maybe_due(): + # type: () -> None + + if RedBeatScheduler is None: + return + + original_maybe_due = RedBeatScheduler.maybe_due + + def sentry_maybe_due(*args, **kwargs): + # type: (*Any, **Any) -> None + scheduler, schedule_entry = args + app = scheduler.app + + celery_schedule = schedule_entry.schedule + monitor_name = schedule_entry.name + + hub = Hub.current + integration = hub.get_integration(CeleryIntegration) + if integration is None: + return original_maybe_due(*args, **kwargs) + + if match_regex_list(monitor_name, integration.exclude_beat_tasks): + return original_maybe_due(*args, **kwargs) + + with hub.configure_scope() as scope: + # When tasks are started from Celery Beat, make sure each task has its own trace. + scope.set_new_propagation_context() + + monitor_config = _get_monitor_config(celery_schedule, app, monitor_name) + + is_supported_schedule = bool(monitor_config) + if is_supported_schedule: + headers = schedule_entry.options.pop("headers", {}) + headers.update( + { + "sentry-monitor-slug": monitor_name, + "sentry-monitor-config": monitor_config, + } + ) + + check_in_id = capture_checkin( + monitor_slug=monitor_name, + monitor_config=monitor_config, + status=MonitorStatus.IN_PROGRESS, + ) + headers.update({"sentry-monitor-check-in-id": check_in_id}) + + # Set the Sentry configuration in the options of the ScheduleEntry. + # Those will be picked up in `apply_async` and added to the headers. + schedule_entry.options["headers"] = headers + + return original_maybe_due(*args, **kwargs) + + RedBeatScheduler.maybe_due = sentry_maybe_due + + def _setup_celery_beat_signals(): # type: () -> None task_success.connect(crons_task_success) diff --git a/setup.py b/setup.py index f17ee954b1..b1e9956ada 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ def get_file_text(file_name): "beam": ["apache-beam>=2.12"], "bottle": ["bottle>=0.12.13"], "celery": ["celery>=3"], + "celery-redbeat": ["celery-redbeat>=2"], "chalice": ["chalice>=1.16.0"], "clickhouse-driver": ["clickhouse-driver>=0.2.0"], "django": ["django>=1.8"], diff --git a/tests/integrations/celery/test_celery_beat_crons.py b/tests/integrations/celery/test_celery_beat_crons.py index 9343b3c926..9ffa59b00d 100644 --- a/tests/integrations/celery/test_celery_beat_crons.py +++ b/tests/integrations/celery/test_celery_beat_crons.py @@ -8,6 +8,7 @@ _get_humanized_interval, _get_monitor_config, _patch_beat_apply_entry, + _patch_redbeat_maybe_due, crons_task_success, crons_task_failure, crons_task_retry, @@ -447,3 +448,56 @@ def test_exclude_beat_tasks_option( # The original Scheduler.apply_entry() is called, AND _get_monitor_config is called. assert fake_apply_entry.call_count == 1 assert _get_monitor_config.call_count == 1 + + +@pytest.mark.parametrize( + "task_name,exclude_beat_tasks,task_in_excluded_beat_tasks", + [ + ["some_task_name", ["xxx", "some_task.*"], True], + ["some_task_name", ["xxx", "some_other_task.*"], False], + ], +) +def test_exclude_redbeat_tasks_option( + task_name, exclude_beat_tasks, task_in_excluded_beat_tasks +): + """ + Test excluding Celery RedBeat tasks from automatic instrumentation. + """ + fake_maybe_due = MagicMock() + + fake_redbeat_scheduler = MagicMock() + fake_redbeat_scheduler.maybe_due = fake_maybe_due + + fake_integration = MagicMock() + fake_integration.exclude_beat_tasks = exclude_beat_tasks + + fake_schedule_entry = MagicMock() + fake_schedule_entry.name = task_name + + fake_get_monitor_config = MagicMock() + + with mock.patch( + "sentry_sdk.integrations.celery.RedBeatScheduler", fake_redbeat_scheduler + ) as RedBeatScheduler: # noqa: N806 + with mock.patch( + "sentry_sdk.integrations.celery.Hub.current.get_integration", + return_value=fake_integration, + ): + with mock.patch( + "sentry_sdk.integrations.celery._get_monitor_config", + fake_get_monitor_config, + ) as _get_monitor_config: + # Mimic CeleryIntegration patching of RedBeatScheduler.maybe_due() + _patch_redbeat_maybe_due() + # Mimic Celery RedBeat calling a task from the RedBeat schedule + RedBeatScheduler.maybe_due(fake_redbeat_scheduler, fake_schedule_entry) + + if task_in_excluded_beat_tasks: + # Only the original RedBeatScheduler.maybe_due() is called, _get_monitor_config is NOT called. + assert fake_maybe_due.call_count == 1 + _get_monitor_config.assert_not_called() + + else: + # The original RedBeatScheduler.maybe_due() is called, AND _get_monitor_config is called. + assert fake_maybe_due.call_count == 1 + assert _get_monitor_config.call_count == 1 From 8e44430728fee936733b2e1d8c1f0851f528b1a5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Mar 2024 12:28:55 +0000 Subject: [PATCH 27/37] build(deps): bump checkouts/data-schemas from `ed078ed` to `8232f17` (#2832) Bumps [checkouts/data-schemas](https://github.com/getsentry/sentry-data-schemas) from `ed078ed` to `8232f17`. - [Commits](https://github.com/getsentry/sentry-data-schemas/compare/ed078ed0bb09b9a5d0f387eaf70e449a5ae51cfd...8232f178ae709232907b783d709f5fba80b26201) --- updated-dependencies: - dependency-name: checkouts/data-schemas dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ivana Kellyerova --- checkouts/data-schemas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checkouts/data-schemas b/checkouts/data-schemas index ed078ed0bb..8232f178ae 160000 --- a/checkouts/data-schemas +++ b/checkouts/data-schemas @@ -1 +1 @@ -Subproject commit ed078ed0bb09b9a5d0f387eaf70e449a5ae51cfd +Subproject commit 8232f178ae709232907b783d709f5fba80b26201 From 856e5bce7424c65dc868d95e7d57e7d3dc72decd Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Tue, 19 Mar 2024 15:49:51 +0100 Subject: [PATCH 28/37] fix(awslambda): aws_event can be an empty list (#2849) --- sentry_sdk/integrations/aws_lambda.py | 2 +- tests/integrations/aws_lambda/test_aws.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/aws_lambda.py b/sentry_sdk/integrations/aws_lambda.py index 00752e7487..3cefc90cfb 100644 --- a/sentry_sdk/integrations/aws_lambda.py +++ b/sentry_sdk/integrations/aws_lambda.py @@ -81,7 +81,7 @@ def sentry_handler(aws_event, aws_context, *args, **kwargs): # will be the same for all events in the list, since they're all hitting # the lambda in the same request.) - if isinstance(aws_event, list): + if isinstance(aws_event, list) and len(aws_event) >= 1: request_data = aws_event[0] batch_size = len(aws_event) else: diff --git a/tests/integrations/aws_lambda/test_aws.py b/tests/integrations/aws_lambda/test_aws.py index bea87adce5..5f2dba132d 100644 --- a/tests/integrations/aws_lambda/test_aws.py +++ b/tests/integrations/aws_lambda/test_aws.py @@ -489,6 +489,7 @@ def test_handler(event, context): True, 2, ), + (b"[]", False, 1), ], ) def test_non_dict_event( From a116c55199dfb64f180690bb6eb3c219ca677ca7 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 20 Mar 2024 10:56:12 +0100 Subject: [PATCH 29/37] feat: Add optional `keep_alive` (#2842) --- sentry_sdk/consts.py | 1 + sentry_sdk/transport.py | 35 +++++++++++++++++++++-- tests/test_transport.py | 62 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 83076c762f..6af08b4a40 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -264,6 +264,7 @@ def __init__( ignore_errors=[], # type: Sequence[Union[type, str]] # noqa: B006 max_request_body_size="medium", # type: str socket_options=None, # type: Optional[List[Tuple[int, int, int | bytes]]] + keep_alive=False, # type: bool before_send=None, # type: Optional[EventProcessor] before_breadcrumb=None, # type: Optional[BreadcrumbProcessor] debug=None, # type: Optional[bool] diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index b924ae502a..9ea9cd0c98 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -2,6 +2,7 @@ import io import gzip +import socket import time from datetime import timedelta from collections import defaultdict @@ -21,6 +22,7 @@ from typing import Callable from typing import Dict from typing import Iterable + from typing import List from typing import Optional from typing import Tuple from typing import Type @@ -40,6 +42,21 @@ from urllib import getproxies # type: ignore +KEEP_ALIVE_SOCKET_OPTIONS = [] +for option in [ + (socket.SOL_SOCKET, lambda: getattr(socket, "SO_KEEPALIVE"), 1), # noqa: B009 + (socket.SOL_TCP, lambda: getattr(socket, "TCP_KEEPIDLE"), 45), # noqa: B009 + (socket.SOL_TCP, lambda: getattr(socket, "TCP_KEEPINTVL"), 10), # noqa: B009 + (socket.SOL_TCP, lambda: getattr(socket, "TCP_KEEPCNT"), 6), # noqa: B009 +]: + try: + KEEP_ALIVE_SOCKET_OPTIONS.append((option[0], option[1](), option[2])) + except AttributeError: + # a specific option might not be available on specific systems, + # e.g. TCP_KEEPIDLE doesn't exist on macOS + pass + + class Transport(object): """Baseclass for all transports. @@ -446,8 +463,22 @@ def _get_pool_options(self, ca_certs): "ca_certs": ca_certs or certifi.where(), } - if self.options["socket_options"]: - options["socket_options"] = self.options["socket_options"] + socket_options = None # type: Optional[List[Tuple[int, int, int | bytes]]] + + if self.options["socket_options"] is not None: + socket_options = self.options["socket_options"] + + if self.options["keep_alive"]: + if socket_options is None: + socket_options = [] + + used_options = {(o[0], o[1]) for o in socket_options} + for default_option in KEEP_ALIVE_SOCKET_OPTIONS: + if (default_option[0], default_option[1]) not in used_options: + socket_options.append(default_option) + + if socket_options is not None: + options["socket_options"] = socket_options return options diff --git a/tests/test_transport.py b/tests/test_transport.py index aa471b9081..c1f70b0108 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -13,7 +13,7 @@ from sentry_sdk import Hub, Client, add_breadcrumb, capture_message, Scope from sentry_sdk._compat import datetime_utcnow -from sentry_sdk.transport import _parse_rate_limits +from sentry_sdk.transport import KEEP_ALIVE_SOCKET_OPTIONS, _parse_rate_limits from sentry_sdk.envelope import Envelope, parse_json from sentry_sdk.integrations.logging import LoggingIntegration @@ -167,6 +167,66 @@ def test_socket_options(make_client): assert options["socket_options"] == socket_options +def test_keep_alive_true(make_client): + client = make_client(keep_alive=True) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == KEEP_ALIVE_SOCKET_OPTIONS + + +def test_keep_alive_off_by_default(make_client): + client = make_client() + options = client.transport._get_pool_options([]) + assert "socket_options" not in options + + +def test_socket_options_override_keep_alive(make_client): + socket_options = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 10), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + + client = make_client(socket_options=socket_options, keep_alive=False) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == socket_options + + +def test_socket_options_merge_with_keep_alive(make_client): + socket_options = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 42), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 42), + ] + + client = make_client(socket_options=socket_options, keep_alive=True) + + options = client.transport._get_pool_options([]) + try: + assert options["socket_options"] == [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 42), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 42), + (socket.SOL_TCP, socket.TCP_KEEPIDLE, 45), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + except AttributeError: + assert options["socket_options"] == [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 42), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 42), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + + +def test_socket_options_override_defaults(make_client): + # If socket_options are set to [], this doesn't mean the user doesn't want + # any custom socket_options, but rather that they want to disable the urllib3 + # socket option defaults, so we need to set this and not ignore it. + client = make_client(socket_options=[]) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == [] + + def test_transport_infinite_loop(capturing_server, request, make_client): client = make_client( debug=True, From 2020ecac89aaf5f0005c5a264da1b33a5d9857f0 Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Wed, 20 Mar 2024 10:18:39 +0000 Subject: [PATCH 30/37] release: 1.43.0 --- CHANGELOG.md | 12 ++++++++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84708cd6ae..5d53de6f43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 1.43.0 + +### Various fixes & improvements + +- feat: Add optional `keep_alive` (#2842) by @sentrivana +- fix(awslambda): aws_event can be an empty list (#2849) by @sentrivana +- build(deps): bump checkouts/data-schemas from `ed078ed` to `8232f17` (#2832) by @dependabot +- feat(integrations): Add support for celery-redbeat cron tasks (#2643) by @kwigley +- Small APIdocs improvement (#2828) by @antonpirker +- Re-export `Event` in `types.py` (#2829) by @szokeasaurusrex +- Fixed OpenAI tests (#2834) by @antonpirker + ## 1.42.0 ### Various fixes & improvements diff --git a/docs/conf.py b/docs/conf.py index 48bf8dc82e..2cd901f5fa 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "1.42.0" +release = "1.43.0" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 6af08b4a40..738ca2e1c0 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -321,4 +321,4 @@ def _get_default_options(): del _get_default_options -VERSION = "1.42.0" +VERSION = "1.43.0" diff --git a/setup.py b/setup.py index b1e9956ada..9f4155cad4 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="1.42.0", + version="1.43.0", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", From 970c57790c1b8b35e2404e12316028d047ce02dd Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 20 Mar 2024 11:24:54 +0100 Subject: [PATCH 31/37] Update CHANGELOG.md --- CHANGELOG.md | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d53de6f43..86a849d203 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,32 @@ ### Various fixes & improvements -- feat: Add optional `keep_alive` (#2842) by @sentrivana -- fix(awslambda): aws_event can be an empty list (#2849) by @sentrivana -- build(deps): bump checkouts/data-schemas from `ed078ed` to `8232f17` (#2832) by @dependabot -- feat(integrations): Add support for celery-redbeat cron tasks (#2643) by @kwigley -- Small APIdocs improvement (#2828) by @antonpirker +- Add optional `keep_alive` (#2842) by @sentrivana + + If you're experiencing frequent network issues between the SDK and Sentry, + you can try turning on TCP keep-alive: + + ```python + import sentry_sdk + + sentry_sdk.init( + # ...your usual settings... + keep_alive=True, + ) + ``` + +- Add support for Celery Redbeat cron tasks (#2643) by @kwigley + + The SDK now supports the Redbeat scheduler in addition to the default + Celery Beat scheduler for auto instrumenting crons. See + [the docs](https://docs.sentry.io/platforms/python/integrations/celery/crons/) + for more information about how to set this up. + +- `aws_event` can be an empty list (#2849) by @sentrivana - Re-export `Event` in `types.py` (#2829) by @szokeasaurusrex +- Small API docs improvement (#2828) by @antonpirker - Fixed OpenAI tests (#2834) by @antonpirker +- Bump `checkouts/data-schemas` from `ed078ed` to `8232f17` (#2832) by @dependabot ## 1.42.0 From 48d77672a4e576de568f76ca7c64ca0d63b9d5fd Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Wed, 20 Mar 2024 14:24:32 -0400 Subject: [PATCH 32/37] feat(profiling): Add thread data to spans (#2843) As per getsentry/rfc#75, this adds the thread data to the spans. This will be needed for the continuous profiling mode in #2830. --- sentry_sdk/consts.py | 12 ++ sentry_sdk/profiler.py | 70 +-------- sentry_sdk/tracing.py | 19 ++- sentry_sdk/utils.py | 56 +++++++ tests/conftest.py | 12 ++ tests/integrations/aiohttp/test_aiohttp.py | 21 +-- tests/integrations/asyncpg/test_asyncpg.py | 17 ++- tests/integrations/boto3/test_s3.py | 29 ++-- tests/integrations/celery/test_celery.py | 2 + .../test_clickhouse_driver.py | 25 +++ tests/integrations/django/test_basic.py | 12 +- tests/integrations/grpc/test_grpc.py | 35 +++-- tests/integrations/grpc/test_grpc_aio.py | 23 +-- tests/integrations/httpx/test_httpx.py | 39 +++-- .../redis/asyncio/test_redis_asyncio.py | 27 ++-- .../redis/cluster/test_redis_cluster.py | 37 +++-- .../test_redis_cluster_asyncio.py | 51 ++++--- .../rediscluster/test_rediscluster.py | 73 +++++---- tests/integrations/requests/test_requests.py | 37 +++-- tests/integrations/socket/test_socket.py | 33 ++-- tests/integrations/stdlib/test_httplib.py | 58 +++---- tests/integrations/stdlib/test_subprocess.py | 3 +- .../strawberry/test_strawberry_py3.py | 57 ++++--- tests/test_profiler.py | 70 --------- tests/test_scrubber.py | 5 +- tests/test_utils.py | 143 ++++++++++++++++++ 26 files changed, 599 insertions(+), 367 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 738ca2e1c0..0f3b5e9f94 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -191,6 +191,18 @@ class SPANDATA: Example: "http.handler" """ + THREAD_ID = "thread.id" + """ + Identifier of a thread from where the span originated. This should be a string. + Example: "7972576320" + """ + + THREAD_NAME = "thread.name" + """ + Label identifying a thread from where the span originated. This should be a string. + Example: "MainThread" + """ + class OP: CACHE_GET_ITEM = "cache.get_item" diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index ef4868f745..4fa3e481ae 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -42,6 +42,8 @@ from sentry_sdk.utils import ( capture_internal_exception, filename_for_module, + get_current_thread_meta, + is_gevent, is_valid_sample_rate, logger, nanosecond_time, @@ -126,32 +128,16 @@ try: - from gevent import get_hub as get_gevent_hub # type: ignore - from gevent.monkey import get_original, is_module_patched # type: ignore + from gevent.monkey import get_original # type: ignore from gevent.threadpool import ThreadPool # type: ignore thread_sleep = get_original("time", "sleep") except ImportError: - - def get_gevent_hub(): - # type: () -> Any - return None - thread_sleep = time.sleep - def is_module_patched(*args, **kwargs): - # type: (*Any, **Any) -> bool - # unable to import from gevent means no modules have been patched - return False - ThreadPool = None -def is_gevent(): - # type: () -> bool - return is_module_patched("threading") or is_module_patched("_thread") - - _scheduler = None # type: Optional[Scheduler] # The default sampling frequency to use. This is set at 101 in order to @@ -389,52 +375,6 @@ def get_frame_name(frame): MAX_PROFILE_DURATION_NS = int(3e10) # 30 seconds -def get_current_thread_id(thread=None): - # type: (Optional[threading.Thread]) -> Optional[int] - """ - Try to get the id of the current thread, with various fall backs. - """ - - # if a thread is specified, that takes priority - if thread is not None: - try: - thread_id = thread.ident - if thread_id is not None: - return thread_id - except AttributeError: - pass - - # if the app is using gevent, we should look at the gevent hub first - # as the id there differs from what the threading module reports - if is_gevent(): - gevent_hub = get_gevent_hub() - if gevent_hub is not None: - try: - # this is undocumented, so wrap it in try except to be safe - return gevent_hub.thread_ident - except AttributeError: - pass - - # use the current thread's id if possible - try: - current_thread_id = threading.current_thread().ident - if current_thread_id is not None: - return current_thread_id - except AttributeError: - pass - - # if we can't get the current thread id, fall back to the main thread id - try: - main_thread_id = threading.main_thread().ident - if main_thread_id is not None: - return main_thread_id - except AttributeError: - pass - - # we've tried everything, time to give up - return None - - class Profile(object): def __init__( self, @@ -456,7 +396,7 @@ def __init__( # Various framework integrations are capable of overwriting the active thread id. # If it is set to `None` at the end of the profile, we fall back to the default. - self._default_active_thread_id = get_current_thread_id() or 0 # type: int + self._default_active_thread_id = get_current_thread_meta()[0] or 0 # type: int self.active_thread_id = None # type: Optional[int] try: @@ -479,7 +419,7 @@ def __init__( def update_active_thread_id(self): # type: () -> None - self.active_thread_id = get_current_thread_id() + self.active_thread_id = get_current_thread_meta()[0] logger.debug( "[Profiling] updating active thread id to {tid}".format( tid=self.active_thread_id diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index bac1ceaa60..7afe7e0944 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -5,7 +5,12 @@ import sentry_sdk from sentry_sdk.consts import INSTRUMENTER -from sentry_sdk.utils import is_valid_sample_rate, logger, nanosecond_time +from sentry_sdk.utils import ( + get_current_thread_meta, + is_valid_sample_rate, + logger, + nanosecond_time, +) from sentry_sdk._compat import datetime_utcnow, utc_from_timestamp, PY2 from sentry_sdk.consts import SPANDATA from sentry_sdk._types import TYPE_CHECKING @@ -172,6 +177,9 @@ def __init__( self._span_recorder = None # type: Optional[_SpanRecorder] self._local_aggregator = None # type: Optional[LocalAggregator] + thread_id, thread_name = get_current_thread_meta() + self.set_thread(thread_id, thread_name) + # TODO this should really live on the Transaction class rather than the Span # class def init_span_recorder(self, maxlen): @@ -418,6 +426,15 @@ def set_status(self, value): # type: (str) -> None self.status = value + def set_thread(self, thread_id, thread_name): + # type: (Optional[int], Optional[str]) -> None + + if thread_id is not None: + self.set_data(SPANDATA.THREAD_ID, str(thread_id)) + + if thread_name is not None: + self.set_data(SPANDATA.THREAD_NAME, thread_name) + def set_http_status(self, http_status): # type: (int) -> None self.set_tag( diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 150130a057..a64b4b4d98 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1746,9 +1746,14 @@ def now(): try: + from gevent import get_hub as get_gevent_hub from gevent.monkey import is_module_patched except ImportError: + def get_gevent_hub(): + # type: () -> Any + return None + def is_module_patched(*args, **kwargs): # type: (*Any, **Any) -> bool # unable to import from gevent means no modules have been patched @@ -1758,3 +1763,54 @@ def is_module_patched(*args, **kwargs): def is_gevent(): # type: () -> bool return is_module_patched("threading") or is_module_patched("_thread") + + +def get_current_thread_meta(thread=None): + # type: (Optional[threading.Thread]) -> Tuple[Optional[int], Optional[str]] + """ + Try to get the id of the current thread, with various fall backs. + """ + + # if a thread is specified, that takes priority + if thread is not None: + try: + thread_id = thread.ident + thread_name = thread.name + if thread_id is not None: + return thread_id, thread_name + except AttributeError: + pass + + # if the app is using gevent, we should look at the gevent hub first + # as the id there differs from what the threading module reports + if is_gevent(): + gevent_hub = get_gevent_hub() + if gevent_hub is not None: + try: + # this is undocumented, so wrap it in try except to be safe + return gevent_hub.thread_ident, None + except AttributeError: + pass + + # use the current thread's id if possible + try: + thread = threading.current_thread() + thread_id = thread.ident + thread_name = thread.name + if thread_id is not None: + return thread_id, thread_name + except AttributeError: + pass + + # if we can't get the current thread id, fall back to the main thread id + try: + thread = threading.main_thread() + thread_id = thread.ident + thread_name = thread.name + if thread_id is not None: + return thread_id, thread_name + except AttributeError: + pass + + # we've tried everything, time to give up + return None, None diff --git a/tests/conftest.py b/tests/conftest.py index 85c65462cb..c87111cbf7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -652,3 +652,15 @@ def patch_start_tracing_child(fake_transaction_is_none=False): return_value=fake_transaction, ): yield fake_start_child + + +class ApproxDict(dict): + def __eq__(self, other): + # For an ApproxDict to equal another dict, the other dict just needs to contain + # all the keys from the ApproxDict with the same values. + # + # The other dict may contain additional keys with any value. + return all(key in other and other[key] == value for key, value in self.items()) + + def __ne__(self, other): + return not self.__eq__(other) diff --git a/tests/integrations/aiohttp/test_aiohttp.py b/tests/integrations/aiohttp/test_aiohttp.py index de5cf19f44..90ca466175 100644 --- a/tests/integrations/aiohttp/test_aiohttp.py +++ b/tests/integrations/aiohttp/test_aiohttp.py @@ -9,6 +9,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.integrations.aiohttp import AioHttpIntegration +from tests.conftest import ApproxDict try: from unittest import mock # python 3.3 and above @@ -495,15 +496,17 @@ async def handler(request): crumb = event["breadcrumbs"]["values"][0] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": "http://127.0.0.1:{}/".format(raw_server.port), - "http.fragment": "", - "http.method": "GET", - "http.query": "", - "http.response.status_code": 200, - "reason": "OK", - "extra": "foo", - } + assert crumb["data"] == ApproxDict( + { + "url": "http://127.0.0.1:{}/".format(raw_server.port), + "http.fragment": "", + "http.method": "GET", + "http.query": "", + "http.response.status_code": 200, + "reason": "OK", + "extra": "foo", + } + ) @pytest.mark.asyncio diff --git a/tests/integrations/asyncpg/test_asyncpg.py b/tests/integrations/asyncpg/test_asyncpg.py index a839031c3b..611d8ea9d9 100644 --- a/tests/integrations/asyncpg/test_asyncpg.py +++ b/tests/integrations/asyncpg/test_asyncpg.py @@ -34,6 +34,7 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.tracing_utils import record_sql_queries from sentry_sdk._compat import contextmanager +from tests.conftest import ApproxDict try: from unittest import mock @@ -46,13 +47,15 @@ ) CRUMBS_CONNECT = { "category": "query", - "data": { - "db.name": PG_NAME, - "db.system": "postgresql", - "db.user": PG_USER, - "server.address": PG_HOST, - "server.port": PG_PORT, - }, + "data": ApproxDict( + { + "db.name": PG_NAME, + "db.system": "postgresql", + "db.user": PG_USER, + "server.address": PG_HOST, + "server.port": PG_PORT, + } + ), "message": "connect", "type": "default", } diff --git a/tests/integrations/boto3/test_s3.py b/tests/integrations/boto3/test_s3.py index 5812c2c1bb..8c05b72a3e 100644 --- a/tests/integrations/boto3/test_s3.py +++ b/tests/integrations/boto3/test_s3.py @@ -4,6 +4,7 @@ from sentry_sdk import Hub from sentry_sdk.integrations.boto3 import Boto3Integration +from tests.conftest import ApproxDict from tests.integrations.boto3.aws_mock import MockResponse from tests.integrations.boto3 import read_fixture @@ -65,12 +66,14 @@ def test_streaming(sentry_init, capture_events): span1 = event["spans"][0] assert span1["op"] == "http.client" assert span1["description"] == "aws.s3.GetObject" - assert span1["data"] == { - "http.method": "GET", - "aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf", - "http.fragment": "", - "http.query": "", - } + assert span1["data"] == ApproxDict( + { + "http.method": "GET", + "aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf", + "http.fragment": "", + "http.query": "", + } + ) span2 = event["spans"][1] assert span2["op"] == "http.client.stream" @@ -123,7 +126,13 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): transaction.finish() (event,) = events - assert event["spans"][0]["data"] == { - "http.method": "GET", - # no url data - } + assert event["spans"][0]["data"] == ApproxDict( + { + "http.method": "GET", + # no url data + } + ) + + assert "aws.request.url" not in event["spans"][0]["data"] + assert "http.fragment" not in event["spans"][0]["data"] + assert "http.query" not in event["spans"][0]["data"] diff --git a/tests/integrations/celery/test_celery.py b/tests/integrations/celery/test_celery.py index 0d44ee992e..c6eb55536c 100644 --- a/tests/integrations/celery/test_celery.py +++ b/tests/integrations/celery/test_celery.py @@ -10,6 +10,7 @@ ) from sentry_sdk._compat import text_type +from tests.conftest import ApproxDict from celery import Celery, VERSION from celery.bin import worker @@ -218,6 +219,7 @@ def dummy_task(x, y): assert execution_event["spans"] == [] assert submission_event["spans"] == [ { + "data": ApproxDict(), "description": "dummy_task", "op": "queue.submit.celery", "parent_span_id": submission_event["contexts"]["trace"]["span_id"], diff --git a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py index 74a04fac44..b39f722c52 100644 --- a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py +++ b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py @@ -10,6 +10,7 @@ from sentry_sdk import start_transaction, capture_message from sentry_sdk.integrations.clickhouse_driver import ClickhouseDriverIntegration +from tests.conftest import ApproxDict EXPECT_PARAMS_IN_SELECT = True if clickhouse_driver.VERSION < (0, 2, 6): @@ -102,6 +103,9 @@ def test_clickhouse_client_breadcrumbs(sentry_init, capture_events) -> None: if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -201,6 +205,9 @@ def test_clickhouse_client_breadcrumbs_with_pii(sentry_init, capture_events) -> if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -313,6 +320,9 @@ def test_clickhouse_client_spans( if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) @@ -434,6 +444,9 @@ def test_clickhouse_client_spans_with_pii( if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) @@ -529,6 +542,9 @@ def test_clickhouse_dbapi_breadcrumbs(sentry_init, capture_events) -> None: if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -629,6 +645,9 @@ def test_clickhouse_dbapi_breadcrumbs_with_pii(sentry_init, capture_events) -> N if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -739,6 +758,9 @@ def test_clickhouse_dbapi_spans(sentry_init, capture_events, capture_envelopes) if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) @@ -860,6 +882,9 @@ def test_clickhouse_dbapi_spans_with_pii( if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) diff --git a/tests/integrations/django/test_basic.py b/tests/integrations/django/test_basic.py index 095657fd8a..8c01c71830 100644 --- a/tests/integrations/django/test_basic.py +++ b/tests/integrations/django/test_basic.py @@ -27,7 +27,7 @@ from sentry_sdk.integrations.django.caching import _get_span_description from sentry_sdk.integrations.executing import ExecutingIntegration from sentry_sdk.tracing import Span -from tests.conftest import unpack_werkzeug_response +from tests.conftest import ApproxDict, unpack_werkzeug_response from tests.integrations.django.myapp.wsgi import application from tests.integrations.django.utils import pytest_mark_django_db_decorator @@ -1237,14 +1237,14 @@ def test_cache_spans_middleware( assert first_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert first_event["spans"][0]["data"] == {"cache.hit": False} + assert first_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert len(second_event["spans"]) == 2 assert second_event["spans"][0]["op"] == "cache.get_item" assert second_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert second_event["spans"][0]["data"] == {"cache.hit": False} + assert second_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert second_event["spans"][1]["op"] == "cache.get_item" assert second_event["spans"][1]["description"].startswith( @@ -1279,14 +1279,14 @@ def test_cache_spans_decorator(sentry_init, client, capture_events, use_django_c assert first_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert first_event["spans"][0]["data"] == {"cache.hit": False} + assert first_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert len(second_event["spans"]) == 2 assert second_event["spans"][0]["op"] == "cache.get_item" assert second_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert second_event["spans"][0]["data"] == {"cache.hit": False} + assert second_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert second_event["spans"][1]["op"] == "cache.get_item" assert second_event["spans"][1]["description"].startswith( @@ -1323,7 +1323,7 @@ def test_cache_spans_templatetag( assert first_event["spans"][0]["description"].startswith( "get template.cache.some_identifier." ) - assert first_event["spans"][0]["data"] == {"cache.hit": False} + assert first_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert len(second_event["spans"]) == 1 assert second_event["spans"][0]["op"] == "cache.get_item" diff --git a/tests/integrations/grpc/test_grpc.py b/tests/integrations/grpc/test_grpc.py index 0813d655ae..3f49c0a0f4 100644 --- a/tests/integrations/grpc/test_grpc.py +++ b/tests/integrations/grpc/test_grpc.py @@ -11,6 +11,7 @@ from sentry_sdk import Hub, start_transaction from sentry_sdk.consts import OP from sentry_sdk.integrations.grpc import GRPCIntegration +from tests.conftest import ApproxDict from tests.integrations.grpc.grpc_test_service_pb2 import gRPCTestMessage from tests.integrations.grpc.grpc_test_service_pb2_grpc import ( gRPCTestServiceServicer, @@ -151,11 +152,13 @@ def test_grpc_client_starts_span(sentry_init, capture_events_forksafe): span["description"] == "unary unary call to /grpc_test_server.gRPCTestService/TestServe" ) - assert span["data"] == { - "type": "unary unary", - "method": "/grpc_test_server.gRPCTestService/TestServe", - "code": "OK", - } + assert span["data"] == ApproxDict( + { + "type": "unary unary", + "method": "/grpc_test_server.gRPCTestService/TestServe", + "code": "OK", + } + ) @pytest.mark.forked @@ -183,10 +186,12 @@ def test_grpc_client_unary_stream_starts_span(sentry_init, capture_events_forksa span["description"] == "unary stream call to /grpc_test_server.gRPCTestService/TestUnaryStream" ) - assert span["data"] == { - "type": "unary stream", - "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", - } + assert span["data"] == ApproxDict( + { + "type": "unary stream", + "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", + } + ) # using unittest.mock.Mock not possible because grpc verifies @@ -229,11 +234,13 @@ def test_grpc_client_other_interceptor(sentry_init, capture_events_forksafe): span["description"] == "unary unary call to /grpc_test_server.gRPCTestService/TestServe" ) - assert span["data"] == { - "type": "unary unary", - "method": "/grpc_test_server.gRPCTestService/TestServe", - "code": "OK", - } + assert span["data"] == ApproxDict( + { + "type": "unary unary", + "method": "/grpc_test_server.gRPCTestService/TestServe", + "code": "OK", + } + ) @pytest.mark.forked diff --git a/tests/integrations/grpc/test_grpc_aio.py b/tests/integrations/grpc/test_grpc_aio.py index 0b8571adca..3e21188ec8 100644 --- a/tests/integrations/grpc/test_grpc_aio.py +++ b/tests/integrations/grpc/test_grpc_aio.py @@ -11,6 +11,7 @@ from sentry_sdk import Hub, start_transaction from sentry_sdk.consts import OP from sentry_sdk.integrations.grpc import GRPCIntegration +from tests.conftest import ApproxDict from tests.integrations.grpc.grpc_test_service_pb2 import gRPCTestMessage from tests.integrations.grpc.grpc_test_service_pb2_grpc import ( gRPCTestServiceServicer, @@ -161,11 +162,13 @@ async def test_grpc_client_starts_span( span["description"] == "unary unary call to /grpc_test_server.gRPCTestService/TestServe" ) - assert span["data"] == { - "type": "unary unary", - "method": "/grpc_test_server.gRPCTestService/TestServe", - "code": "OK", - } + assert span["data"] == ApproxDict( + { + "type": "unary unary", + "method": "/grpc_test_server.gRPCTestService/TestServe", + "code": "OK", + } + ) @pytest.mark.asyncio @@ -190,10 +193,12 @@ async def test_grpc_client_unary_stream_starts_span( span["description"] == "unary stream call to /grpc_test_server.gRPCTestService/TestUnaryStream" ) - assert span["data"] == { - "type": "unary stream", - "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", - } + assert span["data"] == ApproxDict( + { + "type": "unary stream", + "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", + } + ) @pytest.mark.asyncio diff --git a/tests/integrations/httpx/test_httpx.py b/tests/integrations/httpx/test_httpx.py index e141faa282..c4ca97321c 100644 --- a/tests/integrations/httpx/test_httpx.py +++ b/tests/integrations/httpx/test_httpx.py @@ -7,6 +7,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.consts import MATCH_ALL, SPANDATA from sentry_sdk.integrations.httpx import HttpxIntegration +from tests.conftest import ApproxDict try: from unittest import mock # python 3.3 and above @@ -46,15 +47,17 @@ def before_breadcrumb(crumb, hint): crumb = event["breadcrumbs"]["values"][0] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - "extra": "foo", - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + "extra": "foo", + } + ) @pytest.mark.parametrize( @@ -291,9 +294,15 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): capture_message("Testing!") (event,) = events - assert event["breadcrumbs"]["values"][0]["data"] == { - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - # no url related data - } + assert event["breadcrumbs"]["values"][0]["data"] == ApproxDict( + { + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + # no url related data + } + ) + + assert "url" not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_FRAGMENT not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_QUERY not in event["breadcrumbs"]["values"][0]["data"] diff --git a/tests/integrations/redis/asyncio/test_redis_asyncio.py b/tests/integrations/redis/asyncio/test_redis_asyncio.py index 7233b8f908..4f024a2824 100644 --- a/tests/integrations/redis/asyncio/test_redis_asyncio.py +++ b/tests/integrations/redis/asyncio/test_redis_asyncio.py @@ -3,6 +3,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict from fakeredis.aioredis import FakeRedis @@ -64,18 +65,20 @@ async def test_async_redis_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "0", - SPANDATA.SERVER_ADDRESS: connection.connection_pool.connection_kwargs.get( - "host" - ), - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "0", + SPANDATA.SERVER_ADDRESS: connection.connection_pool.connection_kwargs.get( + "host" + ), + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.transaction": is_transaction, "redis.is_cluster": False, diff --git a/tests/integrations/redis/cluster/test_redis_cluster.py b/tests/integrations/redis/cluster/test_redis_cluster.py index 1e1e59e254..a16d66588c 100644 --- a/tests/integrations/redis/cluster/test_redis_cluster.py +++ b/tests/integrations/redis/cluster/test_redis_cluster.py @@ -3,6 +3,7 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.api import start_transaction from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict import redis @@ -82,12 +83,14 @@ def test_rediscluster_basic(sentry_init, capture_events, send_default_pii, descr span = spans[-1] assert span["op"] == "db.redis" assert span["description"] == description - assert span["data"] == { - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "db.operation": "SET", "redis.command": "SET", @@ -125,16 +128,18 @@ def test_rediscluster_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.transaction": False, # For Cluster, this is always False "redis.is_cluster": True, diff --git a/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py b/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py index ad78b79e27..a6d8962afe 100644 --- a/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py +++ b/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py @@ -3,6 +3,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict from redis.asyncio import cluster @@ -47,12 +48,14 @@ async def test_async_breadcrumb(sentry_init, capture_events): assert crumb == { "category": "redis", "message": "GET 'foobar'", - "data": { - "db.operation": "GET", - "redis.key": "foobar", - "redis.command": "GET", - "redis.is_cluster": True, - }, + "data": ApproxDict( + { + "db.operation": "GET", + "redis.key": "foobar", + "redis.command": "GET", + "redis.is_cluster": True, + } + ), "timestamp": crumb["timestamp"], "type": "redis", } @@ -82,12 +85,14 @@ async def test_async_basic(sentry_init, capture_events, send_default_pii, descri (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == description - assert span["data"] == { - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.is_cluster": True, "db.operation": "SET", @@ -126,16 +131,18 @@ async def test_async_redis_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.transaction": False, "redis.is_cluster": True, diff --git a/tests/integrations/rediscluster/test_rediscluster.py b/tests/integrations/rediscluster/test_rediscluster.py index 14d831a647..88f987758b 100644 --- a/tests/integrations/rediscluster/test_rediscluster.py +++ b/tests/integrations/rediscluster/test_rediscluster.py @@ -4,6 +4,7 @@ from sentry_sdk.api import start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict try: from unittest import mock @@ -56,12 +57,14 @@ def test_rediscluster_basic(rediscluster_cls, sentry_init, capture_events): assert crumb == { "category": "redis", "message": "GET 'foobar'", - "data": { - "db.operation": "GET", - "redis.key": "foobar", - "redis.command": "GET", - "redis.is_cluster": True, - }, + "data": ApproxDict( + { + "db.operation": "GET", + "redis.key": "foobar", + "redis.command": "GET", + "redis.is_cluster": True, + } + ), "timestamp": crumb["timestamp"], "type": "redis", } @@ -96,16 +99,18 @@ def test_rediscluster_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "1", - SPANDATA.SERVER_ADDRESS: "localhost", - SPANDATA.SERVER_PORT: 63791, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "1", + SPANDATA.SERVER_ADDRESS: "localhost", + SPANDATA.SERVER_PORT: 63791, + } + ) assert span["tags"] == { "redis.transaction": False, # For Cluster, this is always False "redis.is_cluster": True, @@ -127,12 +132,14 @@ def test_db_connection_attributes_client(sentry_init, capture_events, redisclust (event,) = events (span,) = event["spans"] - assert span["data"] == { - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "1", - SPANDATA.SERVER_ADDRESS: "localhost", - SPANDATA.SERVER_PORT: 63791, - } + assert span["data"] == ApproxDict( + { + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "1", + SPANDATA.SERVER_ADDRESS: "localhost", + SPANDATA.SERVER_PORT: 63791, + } + ) @pytest.mark.parametrize("rediscluster_cls", rediscluster_classes) @@ -155,13 +162,15 @@ def test_db_connection_attributes_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 1, - "first_ten": ["GET 'foo'"], - }, - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "1", - SPANDATA.SERVER_ADDRESS: "localhost", - SPANDATA.SERVER_PORT: 63791, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 1, + "first_ten": ["GET 'foo'"], + }, + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "1", + SPANDATA.SERVER_ADDRESS: "localhost", + SPANDATA.SERVER_PORT: 63791, + } + ) diff --git a/tests/integrations/requests/test_requests.py b/tests/integrations/requests/test_requests.py index ed5b273712..1f4dd412d7 100644 --- a/tests/integrations/requests/test_requests.py +++ b/tests/integrations/requests/test_requests.py @@ -6,6 +6,7 @@ from sentry_sdk import capture_message from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.stdlib import StdlibIntegration +from tests.conftest import ApproxDict try: from unittest import mock # python 3.3 and above @@ -28,14 +29,16 @@ def test_crumb_capture(sentry_init, capture_events): (crumb,) = event["breadcrumbs"]["values"] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - SPANDATA.HTTP_STATUS_CODE: response.status_code, - "reason": response.reason, - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + SPANDATA.HTTP_STATUS_CODE: response.status_code, + "reason": response.reason, + } + ) @pytest.mark.tests_internal_exceptions @@ -56,9 +59,15 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): capture_message("Testing!") (event,) = events - assert event["breadcrumbs"]["values"][0]["data"] == { - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: response.status_code, - "reason": response.reason, - # no url related data - } + assert event["breadcrumbs"]["values"][0]["data"] == ApproxDict( + { + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: response.status_code, + "reason": response.reason, + # no url related data + } + ) + + assert "url" not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_FRAGMENT not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_QUERY not in event["breadcrumbs"]["values"][0]["data"] diff --git a/tests/integrations/socket/test_socket.py b/tests/integrations/socket/test_socket.py index 914ba0bf84..4f93c1f2a5 100644 --- a/tests/integrations/socket/test_socket.py +++ b/tests/integrations/socket/test_socket.py @@ -2,6 +2,7 @@ from sentry_sdk import start_transaction from sentry_sdk.integrations.socket import SocketIntegration +from tests.conftest import ApproxDict def test_getaddrinfo_trace(sentry_init, capture_events): @@ -16,10 +17,12 @@ def test_getaddrinfo_trace(sentry_init, capture_events): assert span["op"] == "socket.dns" assert span["description"] == "example.com:443" - assert span["data"] == { - "host": "example.com", - "port": 443, - } + assert span["data"] == ApproxDict( + { + "host": "example.com", + "port": 443, + } + ) def test_create_connection_trace(sentry_init, capture_events): @@ -37,15 +40,19 @@ def test_create_connection_trace(sentry_init, capture_events): assert connect_span["op"] == "socket.connection" assert connect_span["description"] == "example.com:443" - assert connect_span["data"] == { - "address": ["example.com", 443], - "timeout": timeout, - "source_address": None, - } + assert connect_span["data"] == ApproxDict( + { + "address": ["example.com", 443], + "timeout": timeout, + "source_address": None, + } + ) assert dns_span["op"] == "socket.dns" assert dns_span["description"] == "example.com:443" - assert dns_span["data"] == { - "host": "example.com", - "port": 443, - } + assert dns_span["data"] == ApproxDict( + { + "host": "example.com", + "port": 443, + } + ) diff --git a/tests/integrations/stdlib/test_httplib.py b/tests/integrations/stdlib/test_httplib.py index d50bf42e21..6055b86ab8 100644 --- a/tests/integrations/stdlib/test_httplib.py +++ b/tests/integrations/stdlib/test_httplib.py @@ -27,7 +27,7 @@ from sentry_sdk.tracing import Transaction from sentry_sdk.integrations.stdlib import StdlibIntegration -from tests.conftest import create_mock_http_server +from tests.conftest import ApproxDict, create_mock_http_server PORT = create_mock_http_server() @@ -46,14 +46,16 @@ def test_crumb_capture(sentry_init, capture_events): assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + } + ) def test_crumb_capture_hint(sentry_init, capture_events): @@ -73,15 +75,17 @@ def before_breadcrumb(crumb, hint): (crumb,) = event["breadcrumbs"]["values"] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - "extra": "foo", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + "extra": "foo", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + } + ) def test_empty_realurl(sentry_init): @@ -131,14 +135,16 @@ def test_httplib_misuse(sentry_init, capture_events, request): assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": "http://localhost:{}/200".format(PORT), - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - } + assert crumb["data"] == ApproxDict( + { + "url": "http://localhost:{}/200".format(PORT), + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + } + ) def test_outgoing_trace_headers(sentry_init, monkeypatch): diff --git a/tests/integrations/stdlib/test_subprocess.py b/tests/integrations/stdlib/test_subprocess.py index 31da043ac3..d61be35fd2 100644 --- a/tests/integrations/stdlib/test_subprocess.py +++ b/tests/integrations/stdlib/test_subprocess.py @@ -8,6 +8,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk._compat import PY2 from sentry_sdk.integrations.stdlib import StdlibIntegration +from tests.conftest import ApproxDict if PY2: @@ -125,7 +126,7 @@ def test_subprocess_basic( assert message_event["message"] == "hi" - data = {"subprocess.cwd": os.getcwd()} if with_cwd else {} + data = ApproxDict({"subprocess.cwd": os.getcwd()} if with_cwd else {}) (crumb,) = message_event["breadcrumbs"]["values"] assert crumb == { diff --git a/tests/integrations/strawberry/test_strawberry_py3.py b/tests/integrations/strawberry/test_strawberry_py3.py index b357779461..4911a1b5c3 100644 --- a/tests/integrations/strawberry/test_strawberry_py3.py +++ b/tests/integrations/strawberry/test_strawberry_py3.py @@ -25,6 +25,7 @@ SentryAsyncExtension, SentrySyncExtension, ) +from tests.conftest import ApproxDict parameterize_strawberry_test = pytest.mark.parametrize( @@ -351,12 +352,14 @@ def test_capture_transaction_on_error( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Query.error" - assert resolve_span["data"] == { - "graphql.field_name": "error", - "graphql.parent_type": "Query", - "graphql.field_path": "Query.error", - "graphql.path": "error", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "error", + "graphql.parent_type": "Query", + "graphql.field_path": "Query.error", + "graphql.path": "error", + } + ) @parameterize_strawberry_test @@ -429,12 +432,14 @@ def test_capture_transaction_on_success( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Query.hello" - assert resolve_span["data"] == { - "graphql.field_name": "hello", - "graphql.parent_type": "Query", - "graphql.field_path": "Query.hello", - "graphql.path": "hello", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "hello", + "graphql.parent_type": "Query", + "graphql.field_path": "Query.hello", + "graphql.path": "hello", + } + ) @parameterize_strawberry_test @@ -507,12 +512,14 @@ def test_transaction_no_operation_name( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Query.hello" - assert resolve_span["data"] == { - "graphql.field_name": "hello", - "graphql.parent_type": "Query", - "graphql.field_path": "Query.hello", - "graphql.path": "hello", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "hello", + "graphql.parent_type": "Query", + "graphql.field_path": "Query.hello", + "graphql.path": "hello", + } + ) @parameterize_strawberry_test @@ -585,9 +592,11 @@ def test_transaction_mutation( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Mutation.change" - assert resolve_span["data"] == { - "graphql.field_name": "change", - "graphql.parent_type": "Mutation", - "graphql.field_path": "Mutation.change", - "graphql.path": "change", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "change", + "graphql.parent_type": "Mutation", + "graphql.field_path": "Mutation.change", + "graphql.path": "change", + } + ) diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 94659ff02f..495dd3f300 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -16,13 +16,11 @@ extract_frame, extract_stack, frame_id, - get_current_thread_id, get_frame_name, setup_profiler, ) from sentry_sdk.tracing import Transaction from sentry_sdk._lru_cache import LRUCache -from sentry_sdk._queue import Queue try: from unittest import mock # python 3.3 and above @@ -556,74 +554,6 @@ def test_extract_stack_with_cache(frame, depth): assert frame1 is frame2, i -@requires_python_version(3, 3) -def test_get_current_thread_id_explicit_thread(): - results = Queue(maxsize=1) - - def target1(): - pass - - def target2(): - results.put(get_current_thread_id(thread1)) - - thread1 = threading.Thread(target=target1) - thread1.start() - - thread2 = threading.Thread(target=target2) - thread2.start() - - thread2.join() - thread1.join() - - assert thread1.ident == results.get(timeout=1) - - -@requires_python_version(3, 3) -@requires_gevent -def test_get_current_thread_id_gevent_in_thread(): - results = Queue(maxsize=1) - - def target(): - job = gevent.spawn(get_current_thread_id) - job.join() - results.put(job.value) - - thread = threading.Thread(target=target) - thread.start() - thread.join() - assert thread.ident == results.get(timeout=1) - - -@requires_python_version(3, 3) -def test_get_current_thread_id_running_thread(): - results = Queue(maxsize=1) - - def target(): - results.put(get_current_thread_id()) - - thread = threading.Thread(target=target) - thread.start() - thread.join() - assert thread.ident == results.get(timeout=1) - - -@requires_python_version(3, 3) -def test_get_current_thread_id_main_thread(): - results = Queue(maxsize=1) - - def target(): - # mock that somehow the current thread doesn't exist - with mock.patch("threading.current_thread", side_effect=[None]): - results.put(get_current_thread_id()) - - thread_id = threading.main_thread().ident if sys.version_info >= (3, 4) else None - - thread = threading.Thread(target=target) - thread.start() - thread.join() - assert thread_id == results.get(timeout=1) - - def get_scheduler_threads(scheduler): return [thread for thread in threading.enumerate() if thread.name == scheduler.name] diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py index 126bf158d8..2c4bd3aa90 100644 --- a/tests/test_scrubber.py +++ b/tests/test_scrubber.py @@ -4,6 +4,7 @@ from sentry_sdk import capture_exception, capture_event, start_transaction, start_span from sentry_sdk.utils import event_from_exception from sentry_sdk.scrubber import EventScrubber +from tests.conftest import ApproxDict logger = logging.getLogger(__name__) @@ -121,7 +122,9 @@ def test_span_data_scrubbing(sentry_init, capture_events): span.set_data("datafoo", "databar") (event,) = events - assert event["spans"][0]["data"] == {"password": "[Filtered]", "datafoo": "databar"} + assert event["spans"][0]["data"] == ApproxDict( + {"password": "[Filtered]", "datafoo": "databar"} + ) assert event["_meta"]["spans"] == { "0": {"data": {"password": {"": {"rem": [["!config", "s"]]}}}} } diff --git a/tests/test_utils.py b/tests/test_utils.py index 147064b541..4b8e9087cc 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,12 +1,15 @@ import pytest import re import sys +import threading from datetime import timedelta from sentry_sdk._compat import duration_in_milliseconds +from sentry_sdk._queue import Queue from sentry_sdk.utils import ( Components, Dsn, + get_current_thread_meta, get_default_release, get_error_message, get_git_revision, @@ -29,6 +32,11 @@ except ImportError: import mock # python < 3.3 +try: + import gevent +except ImportError: + gevent = None + try: # Python 3 FileNotFoundError @@ -607,3 +615,138 @@ def test_default_release_empty_string(): ) def test_duration_in_milliseconds(timedelta, expected_milliseconds): assert duration_in_milliseconds(timedelta) == expected_milliseconds + + +def test_get_current_thread_meta_explicit_thread(): + results = Queue(maxsize=1) + + def target1(): + pass + + def target2(): + results.put(get_current_thread_meta(thread1)) + + thread1 = threading.Thread(target=target1) + thread1.start() + + thread2 = threading.Thread(target=target2) + thread2.start() + + thread2.join() + thread1.join() + + assert (thread1.ident, thread1.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_bad_explicit_thread(): + thread = "fake thread" + + main_thread = threading.main_thread() + + assert (main_thread.ident, main_thread.name) == get_current_thread_meta(thread) + + +@pytest.mark.skipif(gevent is None, reason="gevent not enabled") +def test_get_current_thread_meta_gevent_in_thread(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("sentry_sdk.utils.is_gevent", side_effect=[True]): + job = gevent.spawn(get_current_thread_meta) + job.join() + results.put(job.value) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (thread.ident, None) == results.get(timeout=1) + + +@pytest.mark.skipif(gevent is None, reason="gevent not enabled") +def test_get_current_thread_meta_gevent_in_thread_failed_to_get_hub(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("sentry_sdk.utils.is_gevent", side_effect=[True]): + with mock.patch( + "sentry_sdk.utils.get_gevent_hub", side_effect=["fake hub"] + ): + job = gevent.spawn(get_current_thread_meta) + job.join() + results.put(job.value) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (thread.ident, thread.name) == results.get(timeout=1) + + +def test_get_current_thread_meta_running_thread(): + results = Queue(maxsize=1) + + def target(): + results.put(get_current_thread_meta()) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (thread.ident, thread.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_bad_running_thread(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("threading.current_thread", side_effect=["fake thread"]): + results.put(get_current_thread_meta()) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + + main_thread = threading.main_thread() + assert (main_thread.ident, main_thread.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_main_thread(): + results = Queue(maxsize=1) + + def target(): + # mock that somehow the current thread doesn't exist + with mock.patch("threading.current_thread", side_effect=[None]): + results.put(get_current_thread_meta()) + + main_thread = threading.main_thread() + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (main_thread.ident, main_thread.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_failed_to_get_main_thread(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("threading.current_thread", side_effect=["fake thread"]): + with mock.patch("threading.current_thread", side_effect=["fake thread"]): + results.put(get_current_thread_meta()) + + main_thread = threading.main_thread() + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (main_thread.ident, main_thread.name) == results.get(timeout=1) From 05d1e5ca94cc4fffcd01c46ceda6713459308404 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Mar 2024 09:49:25 +0100 Subject: [PATCH 33/37] build(deps): bump checkouts/data-schemas from `8232f17` to `1e17eb5` (#2901) Bumps [checkouts/data-schemas](https://github.com/getsentry/sentry-data-schemas) from `8232f17` to `1e17eb5`. - [Commits](https://github.com/getsentry/sentry-data-schemas/compare/8232f178ae709232907b783d709f5fba80b26201...1e17eb54727a77681a1b9e845c9a5d55b52d35a1) --- updated-dependencies: - dependency-name: checkouts/data-schemas dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- checkouts/data-schemas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checkouts/data-schemas b/checkouts/data-schemas index 8232f178ae..1e17eb5472 160000 --- a/checkouts/data-schemas +++ b/checkouts/data-schemas @@ -1 +1 @@ -Subproject commit 8232f178ae709232907b783d709f5fba80b26201 +Subproject commit 1e17eb54727a77681a1b9e845c9a5d55b52d35a1 From 790ee6a819b1441b1273d962bf0cfa345f004a27 Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Wed, 27 Mar 2024 15:15:40 +0100 Subject: [PATCH 34/37] Explicit reexport of types (#2866) (#2913) Explicitly reexport types to make strict mypy setups happy. This backports #2866 to 1.x. Fixes GH-2910 Co-authored-by: Anton Pirker --- sentry_sdk/types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sentry_sdk/types.py b/sentry_sdk/types.py index 5c46de7f88..f7397adee1 100644 --- a/sentry_sdk/types.py +++ b/sentry_sdk/types.py @@ -12,3 +12,5 @@ if TYPE_CHECKING: from sentry_sdk._types import Event, Hint # noqa: F401 + + __all__ = ["Event", "Hint"] From 6c2eb539f7b8ebb0f2fa9ed05cce4f862843eb9d Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Wed, 27 Mar 2024 15:38:10 +0100 Subject: [PATCH 35/37] ref: Define types at runtime (#2914) Set types in sentry_sdk.types to None at runtime. This allows the types to be imported from outside if TYPE_CHECKING guards. Fixes GH-2909 Co-authored-by: Anton Pirker Co-authored-by: anthony sottile <103459774+asottile-sentry@users.noreply.github.com> --- sentry_sdk/types.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/types.py b/sentry_sdk/types.py index f7397adee1..9a96ed489f 100644 --- a/sentry_sdk/types.py +++ b/sentry_sdk/types.py @@ -11,6 +11,11 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from sentry_sdk._types import Event, Hint # noqa: F401 + from sentry_sdk._types import Event, Hint +else: + # The lines below allow the types to be imported from outside `if TYPE_CHECKING` + # guards. The types in this module are only intended to be used for type hints. + Event = None + Hint = None - __all__ = ["Event", "Hint"] +__all__ = ("Event", "Hint") From a4e44fa6a2085a2fbccae46edcf6da67052cc6db Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Thu, 28 Mar 2024 10:04:38 +0000 Subject: [PATCH 36/37] release: 1.44.0 --- CHANGELOG.md | 9 +++++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86a849d203..a09fc4621e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 1.44.0 + +### Various fixes & improvements + +- ref: Define types at runtime (#2914) by @szokeasaurusrex +- Explicit reexport of types (#2866) (#2913) by @szokeasaurusrex +- build(deps): bump checkouts/data-schemas from `8232f17` to `1e17eb5` (#2901) by @dependabot +- feat(profiling): Add thread data to spans (#2843) by @Zylphrex + ## 1.43.0 ### Various fixes & improvements diff --git a/docs/conf.py b/docs/conf.py index 2cd901f5fa..3d55879336 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "1.43.0" +release = "1.44.0" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 0f3b5e9f94..ed296bd5ad 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -333,4 +333,4 @@ def _get_default_options(): del _get_default_options -VERSION = "1.43.0" +VERSION = "1.44.0" diff --git a/setup.py b/setup.py index 9f4155cad4..ff90fae92e 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="1.43.0", + version="1.44.0", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", From 4d8db7187cce5e7516228bec93e6e71811463230 Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Thu, 28 Mar 2024 11:06:54 +0100 Subject: [PATCH 37/37] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a09fc4621e..c4f5c78855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ - ref: Define types at runtime (#2914) by @szokeasaurusrex - Explicit reexport of types (#2866) (#2913) by @szokeasaurusrex -- build(deps): bump checkouts/data-schemas from `8232f17` to `1e17eb5` (#2901) by @dependabot - feat(profiling): Add thread data to spans (#2843) by @Zylphrex ## 1.43.0