diff --git a/.craft.yml b/.craft.yml index 21d4fc7496..70875d5404 100644 --- a/.craft.yml +++ b/.craft.yml @@ -8,7 +8,9 @@ targets: pypi:sentry-sdk: - name: github - name: aws-lambda-layer - includeNames: /^sentry-python-serverless-\d+(\.\d+)*\.zip$/ + # This regex that matches the version is taken from craft: + # https://github.com/getsentry/craft/blob/8d77c38ddbe4be59f98f61b6e42952ca087d3acd/src/utils/version.ts#L11 + includeNames: /^sentry-python-serverless-\bv?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(?:-?([\da-z-]+(?:\.[\da-z-]+)*))?(?:\+([\da-z-]+(?:\.[\da-z-]+)*))?\b.zip$/ layerName: SentryPythonServerlessSDK compatibleRuntimes: - name: python diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 31c0a616f3..f55ec12407 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,6 +9,9 @@ on: force: description: Force a release even when there are release-blockers (optional) required: false + merge_target: + description: Target branch to merge into. Uses the default branch as a fallback (optional) + required: false jobs: release: @@ -26,3 +29,4 @@ jobs: with: version: ${{ github.event.inputs.version }} force: ${{ github.event.inputs.force }} + merge_target: ${{ github.event.inputs.merge_target }} diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml index ddac93d1e5..c40d45845d 100644 --- a/.github/workflows/test-integrations-data-processing.yml +++ b/.github/workflows/test-integrations-data-processing.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.5","3.7","3.8","3.11","3.12"] + python-version: ["3.5","3.7","3.8","3.9","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6 @@ -58,6 +58,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh "py${{ matrix.python-version }}-huey-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai latest + run: | + set -x # print commands that are executed + ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq latest run: | set -x # print commands that are executed @@ -110,6 +114,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai pinned + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq pinned run: | set -x # print commands that are executed @@ -151,6 +159,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py2.7-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai py27 + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py2.7-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq py27 run: | set -x # print commands that are executed diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a57fb34b8..c4f5c78855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,130 @@ # Changelog +## 1.44.0 + +### Various fixes & improvements + +- ref: Define types at runtime (#2914) by @szokeasaurusrex +- Explicit reexport of types (#2866) (#2913) by @szokeasaurusrex +- feat(profiling): Add thread data to spans (#2843) by @Zylphrex + +## 1.43.0 + +### Various fixes & improvements + +- Add optional `keep_alive` (#2842) by @sentrivana + + If you're experiencing frequent network issues between the SDK and Sentry, + you can try turning on TCP keep-alive: + + ```python + import sentry_sdk + + sentry_sdk.init( + # ...your usual settings... + keep_alive=True, + ) + ``` + +- Add support for Celery Redbeat cron tasks (#2643) by @kwigley + + The SDK now supports the Redbeat scheduler in addition to the default + Celery Beat scheduler for auto instrumenting crons. See + [the docs](https://docs.sentry.io/platforms/python/integrations/celery/crons/) + for more information about how to set this up. + +- `aws_event` can be an empty list (#2849) by @sentrivana +- Re-export `Event` in `types.py` (#2829) by @szokeasaurusrex +- Small API docs improvement (#2828) by @antonpirker +- Fixed OpenAI tests (#2834) by @antonpirker +- Bump `checkouts/data-schemas` from `ed078ed` to `8232f17` (#2832) by @dependabot + +## 1.42.0 + +### Various fixes & improvements + +- **New integration:** [OpenAI integration](https://docs.sentry.io/platforms/python/integrations/openai/) (#2791) by @colin-sentry + + We added an integration for OpenAI to capture errors and also performance data when using the OpenAI Python SDK. + + Useage: + + This integrations is auto-enabling, so if you have the `openai` package in your project it will be enabled. Just initialize Sentry before you create your OpenAI client. + + ```python + from openai import OpenAI + + import sentry_sdk + + sentry_sdk.init( + dsn="___PUBLIC_DSN___", + enable_tracing=True, + traces_sample_rate=1.0, + ) + + client = OpenAI() + ``` + + For more information, see the documentation for [OpenAI integration](https://docs.sentry.io/platforms/python/integrations/openai/). + +- Discard open OpenTelemetry spans after 10 minutes (#2801) by @antonpirker +- Propagate sentry-trace and baggage headers to Huey tasks (#2792) by @cnschn +- Added Event type (#2753) by @szokeasaurusrex +- Improve scrub_dict typing (#2768) by @szokeasaurusrex +- Dependencies: bump types-protobuf from 4.24.0.20240302 to 4.24.0.20240311 (#2797) by @dependabot + +## 1.41.0 + +### Various fixes & improvements + +- Add recursive scrubbing to `EventScrubber` (#2755) by @Cheapshot003 + + By default, the `EventScrubber` will not search your events for potential + PII recursively. With this release, you can enable this behavior with: + + ```python + import sentry_sdk + from sentry_sdk.scrubber import EventScrubber + + sentry_sdk.init( + # ...your usual settings... + event_scrubber=EventScrubber(recursive=True), + ) + ``` + +- Expose `socket_options` (#2786) by @sentrivana + + If the SDK is experiencing connection issues (connection resets, server + closing connection without response, etc.) while sending events to Sentry, + tweaking the default `urllib3` socket options to the following can help: + + ```python + import socket + from urllib3.connection import HTTPConnection + import sentry_sdk + + sentry_sdk.init( + # ...your usual settings... + socket_options=HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + # note: skip the following line if you're on MacOS since TCP_KEEPIDLE doesn't exist there + (socket.SOL_TCP, socket.TCP_KEEPIDLE, 45), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 10), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ], + ) + ``` + +- Allow to configure merge target for releases (#2777) by @sentrivana +- Allow empty character in metric tags values (#2775) by @viglia +- Replace invalid tag values with an empty string instead of _ (#2773) by @markushi +- Add documentation comment to `scrub_list` (#2769) by @szokeasaurusrex +- Fixed regex to parse version in lambda package file (#2767) by @antonpirker +- xfail broken AWS Lambda tests for now (#2794) by @sentrivana +- Removed print statements because it messes with the tests (#2789) by @antonpirker +- Bump `types-protobuf` from 4.24.0.20240129 to 4.24.0.20240302 (#2782) by @dependabot +- Bump `checkouts/data-schemas` from `eb941c2` to `ed078ed` (#2781) by @dependabot + ## 1.40.6 ### Various fixes & improvements diff --git a/Makefile b/Makefile index 32cdbb1fff..ac0ef51f5f 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ lint: .venv apidocs: .venv @$(VENV_PATH)/bin/pip install --editable . @$(VENV_PATH)/bin/pip install -U -r ./docs-requirements.txt + rm -rf docs/_build @$(VENV_PATH)/bin/sphinx-build -vv -W -b html docs/ docs/_build .PHONY: apidocs diff --git a/checkouts/data-schemas b/checkouts/data-schemas index eb941c2dcb..1e17eb5472 160000 --- a/checkouts/data-schemas +++ b/checkouts/data-schemas @@ -1 +1 @@ -Subproject commit eb941c2dcbcff9bc04f35ce7f1837de118f790fe +Subproject commit 1e17eb54727a77681a1b9e845c9a5d55b52d35a1 diff --git a/docs/conf.py b/docs/conf.py index 9a9f3fb56a..3d55879336 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "1.40.6" +release = "1.44.0" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/linter-requirements.txt b/linter-requirements.txt index 5fec1f22c4..c390f5fe70 100644 --- a/linter-requirements.txt +++ b/linter-requirements.txt @@ -2,7 +2,7 @@ mypy black flake8==5.0.4 # flake8 depends on pyflakes>=3.0.0 and this dropped support for Python 2 "# type:" comments types-certifi -types-protobuf==4.24.0.20240129 # newer raises an error on mypy sentry_sdk +types-protobuf==4.24.0.20240311 # newer raises an error on mypy sentry_sdk types-redis types-setuptools pymongo # There is no separate types module. diff --git a/mypy.ini b/mypy.ini index fef90c867e..c1444d61e5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -67,6 +67,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-huey.*] ignore_missing_imports = True +[mypy-openai.*] +ignore_missing_imports = True [mypy-arq.*] ignore_missing_imports = True [mypy-grpc.*] diff --git a/scripts/split-tox-gh-actions/split-tox-gh-actions.py b/scripts/split-tox-gh-actions/split-tox-gh-actions.py index f8beffc219..13b81283ca 100755 --- a/scripts/split-tox-gh-actions/split-tox-gh-actions.py +++ b/scripts/split-tox-gh-actions/split-tox-gh-actions.py @@ -70,6 +70,7 @@ "beam", "celery", "huey", + "openai", "rq", ], "Databases": [ diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 2536541072..49bffb3416 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -9,6 +9,10 @@ if TYPE_CHECKING: + from collections.abc import MutableMapping + + from datetime import datetime + from types import TracebackType from typing import Any from typing import Callable @@ -19,13 +23,69 @@ from typing import Tuple from typing import Type from typing import Union - from typing_extensions import Literal + from typing_extensions import Literal, TypedDict + + # "critical" is an alias of "fatal" recognized by Relay + LogLevelStr = Literal["fatal", "critical", "error", "warning", "info", "debug"] + + Event = TypedDict( + "Event", + { + "breadcrumbs": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "check_in_id": str, + "contexts": dict[str, dict[str, object]], + "dist": str, + "duration": Optional[float], + "environment": str, + "errors": list[dict[str, Any]], # TODO: We can expand on this type + "event_id": str, + "exception": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "extra": MutableMapping[str, object], + "fingerprint": list[str], + "level": LogLevelStr, + "logentry": Mapping[str, object], + "logger": str, + "measurements": dict[str, object], + "message": str, + "modules": dict[str, str], + "monitor_config": Mapping[str, object], + "monitor_slug": Optional[str], + "platform": Literal["python"], + "profile": object, # Should be sentry_sdk.profiler.Profile, but we can't import that here due to circular imports + "release": str, + "request": dict[str, object], + "sdk": Mapping[str, object], + "server_name": str, + "spans": list[dict[str, object]], + "stacktrace": dict[ + str, object + ], # We access this key in the code, but I am unsure whether we ever set it + "start_timestamp": datetime, + "status": Optional[str], + "tags": MutableMapping[ + str, str + ], # Tags must be less than 200 characters each + "threads": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "timestamp": Optional[datetime], # Must be set before sending the event + "transaction": str, + "transaction_info": Mapping[str, Any], # TODO: We can expand on this type + "type": Literal["check_in", "transaction"], + "user": dict[str, object], + "_metrics_summary": dict[str, object], + }, + total=False, + ) ExcInfo = Tuple[ Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType] ] - Event = Dict[str, Any] Hint = Dict[str, Any] Breadcrumb = Dict[str, Any] diff --git a/sentry_sdk/api.py b/sentry_sdk/api.py index 1b56571bfa..3148c43f1a 100644 --- a/sentry_sdk/api.py +++ b/sentry_sdk/api.py @@ -22,6 +22,7 @@ BreadcrumbHint, ExcInfo, MeasurementUnit, + LogLevelStr, ) from sentry_sdk.tracing import Span @@ -91,7 +92,7 @@ def capture_event( @hubmethod def capture_message( message, # type: str - level=None, # type: Optional[str] + level=None, # type: Optional[LogLevelStr] scope=None, # type: Optional[Any] **scope_kwargs # type: Any ): @@ -189,7 +190,7 @@ def set_user(value): @scopemethod def set_level(value): - # type: (str) -> None + # type: (LogLevelStr) -> None return Hub.current.scope.set_level(value) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 270d814bfe..296de71804 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1,3 +1,8 @@ +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping # type: ignore[attr-defined] + from importlib import import_module import os import uuid @@ -38,7 +43,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope -from sentry_sdk.profiler import has_profiling_enabled, setup_profiler +from sentry_sdk.profiler import has_profiling_enabled, Profile, setup_profiler from sentry_sdk.scrubber import EventScrubber from sentry_sdk.monitor import Monitor from sentry_sdk.spotlight import setup_spotlight @@ -148,6 +153,12 @@ def _get_options(*args, **kwargs): if rv["event_scrubber"] is None: rv["event_scrubber"] = EventScrubber() + if rv["socket_options"] and not isinstance(rv["socket_options"], list): + logger.warning( + "Ignoring socket_options because of unexpected format. See urllib3.HTTPConnection.socket_options for the expected format." + ) + rv["socket_options"] = None + return rv @@ -387,7 +398,7 @@ def _prepare_event( for key in "release", "environment", "server_name", "dist": if event.get(key) is None and self.options[key] is not None: - event[key] = text_type(self.options[key]).strip() + event[key] = text_type(self.options[key]).strip() # type: ignore[literal-required] if event.get("sdk") is None: sdk_info = dict(SDK_INFO) sdk_info["integrations"] = sorted(self.integrations.keys()) @@ -561,7 +572,7 @@ def _update_session_from_event( errored = True for error in exceptions: mechanism = error.get("mechanism") - if mechanism and mechanism.get("handled") is False: + if isinstance(mechanism, Mapping) and mechanism.get("handled") is False: crashed = True break @@ -653,7 +664,7 @@ def capture_event( headers = { "event_id": event_opt["event_id"], "sent_at": format_timestamp(datetime_utcnow()), - } + } # type: dict[str, object] if dynamic_sampling_context: headers["trace"] = dynamic_sampling_context @@ -661,7 +672,7 @@ def capture_event( envelope = Envelope(headers=headers) if is_transaction: - if profile is not None: + if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) envelope.add_transaction(event_opt) elif is_checkin: diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index fe9736938c..ed296bd5ad 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -14,6 +14,7 @@ from typing import Dict from typing import Any from typing import Sequence + from typing import Tuple from typing_extensions import TypedDict from sentry_sdk.integrations import Integration @@ -190,6 +191,18 @@ class SPANDATA: Example: "http.handler" """ + THREAD_ID = "thread.id" + """ + Identifier of a thread from where the span originated. This should be a string. + Example: "7972576320" + """ + + THREAD_NAME = "thread.name" + """ + Label identifying a thread from where the span originated. This should be a string. + Example: "MainThread" + """ + class OP: CACHE_GET_ITEM = "cache.get_item" @@ -218,6 +231,8 @@ class OP: MIDDLEWARE_STARLITE = "middleware.starlite" MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive" MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send" + OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai" + OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai" QUEUE_SUBMIT_ARQ = "queue.submit.arq" QUEUE_TASK_ARQ = "queue.task.arq" QUEUE_SUBMIT_CELERY = "queue.submit.celery" @@ -260,6 +275,8 @@ def __init__( https_proxy=None, # type: Optional[str] ignore_errors=[], # type: Sequence[Union[type, str]] # noqa: B006 max_request_body_size="medium", # type: str + socket_options=None, # type: Optional[List[Tuple[int, int, int | bytes]]] + keep_alive=False, # type: bool before_send=None, # type: Optional[EventProcessor] before_breadcrumb=None, # type: Optional[BreadcrumbProcessor] debug=None, # type: Optional[bool] @@ -316,4 +333,4 @@ def _get_default_options(): del _get_default_options -VERSION = "1.40.6" +VERSION = "1.44.0" diff --git a/sentry_sdk/crons/api.py b/sentry_sdk/crons/api.py index cd240a7dcd..92d113a924 100644 --- a/sentry_sdk/crons/api.py +++ b/sentry_sdk/crons/api.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Optional + from sentry_sdk._types import Event def _create_check_in_event( @@ -15,7 +16,7 @@ def _create_check_in_event( duration_s=None, monitor_config=None, ): - # type: (Optional[str], Optional[str], Optional[str], Optional[float], Optional[Dict[str, Any]]) -> Dict[str, Any] + # type: (Optional[str], Optional[str], Optional[str], Optional[float], Optional[Dict[str, Any]]) -> Event options = Hub.current.client.options if Hub.current.client else {} check_in_id = check_in_id or uuid.uuid4().hex # type: str @@ -27,7 +28,7 @@ def _create_check_in_event( "duration": duration_s, "environment": options.get("environment", None), "release": options.get("release", None), - } + } # type: Event if monitor_config: check_in["monitor_config"] = monitor_config diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index c339528821..a716d33433 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -40,6 +40,7 @@ Breadcrumb, BreadcrumbHint, ExcInfo, + LogLevelStr, ) from sentry_sdk.consts import ClientConstructor @@ -335,7 +336,7 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs): return last_event_id def capture_message(self, message, level=None, scope=None, **scope_kwargs): - # type: (str, Optional[str], Optional[Scope], Any) -> Optional[str] + # type: (str, Optional[LogLevelStr], Optional[Scope], Any) -> Optional[str] """ Captures a message. diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 21f7188ff1..c9737ae589 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "sentry_sdk.integrations.fastapi.FastApiIntegration", "sentry_sdk.integrations.flask.FlaskIntegration", "sentry_sdk.integrations.httpx.HttpxIntegration", + "sentry_sdk.integrations.openai.OpenAIIntegration", "sentry_sdk.integrations.pyramid.PyramidIntegration", "sentry_sdk.integrations.redis.RedisIntegration", "sentry_sdk.integrations.rq.RqIntegration", diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 5a41654498..b72ebde126 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -22,6 +22,7 @@ from typing import Dict from typing import Optional from typing import Union + from sentry_sdk._types import Event SENSITIVE_ENV_KEYS = ( @@ -59,7 +60,7 @@ def __init__(self, request): self.request = request def extract_into_event(self, event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None client = Hub.current.client if client is None: return diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index e51bdeeac3..19974030ed 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -48,13 +48,12 @@ from aiohttp import TraceRequestStartParams, TraceRequestEndParams from types import SimpleNamespace from typing import Any - from typing import Dict from typing import Optional from typing import Tuple from typing import Union from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor TRANSACTION_STYLE_VALUES = ("handler_name", "method_and_path_pattern") @@ -256,10 +255,10 @@ async def on_request_end(session, trace_config_ctx, params): def _make_request_processor(weak_request): # type: (weakref.ReferenceType[Request]) -> EventProcessor def aiohttp_processor( - event, # type: Dict[str, Any] - hint, # type: Dict[str, Tuple[type, BaseException, Any]] + event, # type: Event + hint, # type: dict[str, Tuple[type, BaseException, Any]] ): - # type: (...) -> Dict[str, Any] + # type: (...) -> Event request = weak_request() if request is None: return event diff --git a/sentry_sdk/integrations/ariadne.py b/sentry_sdk/integrations/ariadne.py index 86d6b5e28e..5b98a88443 100644 --- a/sentry_sdk/integrations/ariadne.py +++ b/sentry_sdk/integrations/ariadne.py @@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional from ariadne.types import GraphQLError, GraphQLResult, GraphQLSchema, QueryParser # type: ignore from graphql.language.ast import DocumentNode # type: ignore - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor class AriadneIntegration(Integration): @@ -131,7 +131,7 @@ def _make_request_event_processor(data): """Add request data and api_target to events.""" def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event if not isinstance(data, dict): return event @@ -163,7 +163,7 @@ def _make_response_event_processor(response): """Add response data to the event's response context.""" def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii() and response.get("errors"): contexts = event.setdefault("contexts", {}) diff --git a/sentry_sdk/integrations/aws_lambda.py b/sentry_sdk/integrations/aws_lambda.py index 00752e7487..3cefc90cfb 100644 --- a/sentry_sdk/integrations/aws_lambda.py +++ b/sentry_sdk/integrations/aws_lambda.py @@ -81,7 +81,7 @@ def sentry_handler(aws_event, aws_context, *args, **kwargs): # will be the same for all events in the list, since they're all hitting # the lambda in the same request.) - if isinstance(aws_event, list): + if isinstance(aws_event, list) and len(aws_event) >= 1: request_data = aws_event[0] batch_size = len(aws_event) else: diff --git a/sentry_sdk/integrations/bottle.py b/sentry_sdk/integrations/bottle.py index cc6360daa3..6f3678466e 100644 --- a/sentry_sdk/integrations/bottle.py +++ b/sentry_sdk/integrations/bottle.py @@ -200,7 +200,7 @@ def _make_request_event_processor(app, request, integration): # type: (Bottle, LocalRequest, BottleIntegration) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event _set_transaction_name_and_source(event, integration.transaction_style, request) with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/celery.py b/sentry_sdk/integrations/celery.py index 0fd983de8d..f2e1aff48a 100644 --- a/sentry_sdk/integrations/celery.py +++ b/sentry_sdk/integrations/celery.py @@ -56,6 +56,11 @@ except ImportError: raise DidNotEnable("Celery not installed") +try: + from redbeat.schedulers import RedBeatScheduler # type: ignore +except ImportError: + RedBeatScheduler = None + CELERY_CONTROL_FLOW_EXCEPTIONS = (Retry, Ignore, Reject) @@ -76,6 +81,7 @@ def __init__( if monitor_beat_tasks: _patch_beat_apply_entry() + _patch_redbeat_maybe_due() _setup_celery_beat_signals() @staticmethod @@ -535,6 +541,62 @@ def sentry_apply_entry(*args, **kwargs): Scheduler.apply_entry = sentry_apply_entry +def _patch_redbeat_maybe_due(): + # type: () -> None + + if RedBeatScheduler is None: + return + + original_maybe_due = RedBeatScheduler.maybe_due + + def sentry_maybe_due(*args, **kwargs): + # type: (*Any, **Any) -> None + scheduler, schedule_entry = args + app = scheduler.app + + celery_schedule = schedule_entry.schedule + monitor_name = schedule_entry.name + + hub = Hub.current + integration = hub.get_integration(CeleryIntegration) + if integration is None: + return original_maybe_due(*args, **kwargs) + + if match_regex_list(monitor_name, integration.exclude_beat_tasks): + return original_maybe_due(*args, **kwargs) + + with hub.configure_scope() as scope: + # When tasks are started from Celery Beat, make sure each task has its own trace. + scope.set_new_propagation_context() + + monitor_config = _get_monitor_config(celery_schedule, app, monitor_name) + + is_supported_schedule = bool(monitor_config) + if is_supported_schedule: + headers = schedule_entry.options.pop("headers", {}) + headers.update( + { + "sentry-monitor-slug": monitor_name, + "sentry-monitor-config": monitor_config, + } + ) + + check_in_id = capture_checkin( + monitor_slug=monitor_name, + monitor_config=monitor_config, + status=MonitorStatus.IN_PROGRESS, + ) + headers.update({"sentry-monitor-check-in-id": check_in_id}) + + # Set the Sentry configuration in the options of the ScheduleEntry. + # Those will be picked up in `apply_async` and added to the headers. + schedule_entry.options["headers"] = headers + + return original_maybe_due(*args, **kwargs) + + RedBeatScheduler.maybe_due = sentry_maybe_due + + def _setup_celery_beat_signals(): # type: () -> None task_success.connect(crons_task_success) diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index 426565e645..98834a4693 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -472,7 +472,7 @@ def sentry_patched_get_response(self, request): def _make_wsgi_request_event_processor(weak_request, integration): # type: (Callable[[], WSGIRequest], DjangoIntegration) -> EventProcessor def wsgi_request_event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. @@ -570,7 +570,7 @@ def parsed_body(self): def _set_user_info(request, event): - # type: (WSGIRequest, Dict[str, Any]) -> None + # type: (WSGIRequest, Event) -> None user_info = event.setdefault("user", {}) user = getattr(request, "user", None) diff --git a/sentry_sdk/integrations/django/asgi.py b/sentry_sdk/integrations/django/asgi.py index 18f6a58811..e1ba678011 100644 --- a/sentry_sdk/integrations/django/asgi.py +++ b/sentry_sdk/integrations/django/asgi.py @@ -26,13 +26,13 @@ from django.core.handlers.asgi import ASGIRequest from django.http.response import HttpResponse - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor def _make_asgi_request_event_processor(request): # type: (ASGIRequest) -> EventProcessor def asgi_request_event_processor(event, hint): - # type: (dict[str, Any], dict[str, Any]) -> dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. diff --git a/sentry_sdk/integrations/falcon.py b/sentry_sdk/integrations/falcon.py index 3fab11cfeb..d5e2480485 100644 --- a/sentry_sdk/integrations/falcon.py +++ b/sentry_sdk/integrations/falcon.py @@ -18,7 +18,7 @@ from typing import Dict from typing import Optional - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor # In Falcon 3.0 `falcon.api_helpers` is renamed to `falcon.app_helpers` # and `falcon.API` to `falcon.App` @@ -258,7 +258,7 @@ def _has_http_5xx_status(response): def _set_transaction_name_and_source(event, transaction_style, request): - # type: (Dict[str, Any], str, falcon.Request) -> None + # type: (Event, str, falcon.Request) -> None name_for_style = { "uri_template": request.uri_template, "path": request.path, @@ -271,7 +271,7 @@ def _make_request_event_processor(req, integration): # type: (falcon.Request, FalconIntegration) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event _set_transaction_name_and_source(event, integration.transaction_style, req) with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/fastapi.py b/sentry_sdk/integrations/fastapi.py index 6fbe53b92b..33a5591cc4 100644 --- a/sentry_sdk/integrations/fastapi.py +++ b/sentry_sdk/integrations/fastapi.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from typing import Any, Callable, Dict from sentry_sdk.scope import Scope + from sentry_sdk._types import Event try: from sentry_sdk.integrations.starlette import ( @@ -111,9 +112,9 @@ async def _sentry_app(*args, **kwargs): info = await extractor.extract_request_info() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, Dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event # Extract information from request request_info = event.get("request", {}) diff --git a/sentry_sdk/integrations/flask.py b/sentry_sdk/integrations/flask.py index 453ab48ce3..f0bc3d7750 100644 --- a/sentry_sdk/integrations/flask.py +++ b/sentry_sdk/integrations/flask.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from typing import Any, Callable, Dict, Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor from sentry_sdk.integrations.wsgi import _ScopedResponse from werkzeug.datastructures import FileStorage, ImmutableMultiDict @@ -172,7 +172,7 @@ def _make_request_event_processor(app, request, integration): # type: (Flask, Callable[[], Request], FlaskIntegration) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to @@ -211,7 +211,7 @@ def _capture_exception(sender, exception, **kwargs): def _add_user_to_event(event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None if flask_login is None: return diff --git a/sentry_sdk/integrations/gnu_backtrace.py b/sentry_sdk/integrations/gnu_backtrace.py index ad9c437878..f8321a6cd7 100644 --- a/sentry_sdk/integrations/gnu_backtrace.py +++ b/sentry_sdk/integrations/gnu_backtrace.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict + from sentry_sdk._types import Event MODULE_RE = r"[a-zA-Z0-9/._:\\-]+" @@ -42,13 +42,13 @@ def setup_once(): # type: () -> None @add_global_event_processor def process_gnu_backtrace(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): return _process_gnu_backtrace(event, hint) def _process_gnu_backtrace(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event if Hub.current.get_integration(GnuBacktraceIntegration) is None: return event diff --git a/sentry_sdk/integrations/gql.py b/sentry_sdk/integrations/gql.py index 79fc8d022f..9db6632a4a 100644 --- a/sentry_sdk/integrations/gql.py +++ b/sentry_sdk/integrations/gql.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Tuple, Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor EventDataType = Dict[str, Union[str, Tuple[VariableDefinitionNode, ...]]] @@ -112,7 +112,7 @@ def sentry_patched_execute(self, document, *args, **kwargs): def _make_gql_event_processor(client, document): # type: (gql.Client, DocumentNode) -> EventProcessor def processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event try: errors = hint["exc_info"][1].errors except (AttributeError, KeyError): diff --git a/sentry_sdk/integrations/graphene.py b/sentry_sdk/integrations/graphene.py index fa753d0812..b9c3b26018 100644 --- a/sentry_sdk/integrations/graphene.py +++ b/sentry_sdk/integrations/graphene.py @@ -19,6 +19,7 @@ from graphene.language.source import Source # type: ignore from graphql.execution import ExecutionResult # type: ignore from graphql.type import GraphQLSchema # type: ignore + from sentry_sdk._types import Event class GrapheneIntegration(Integration): @@ -100,7 +101,7 @@ async def _sentry_patched_graphql_async(schema, source, *args, **kwargs): def _event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event if _should_send_default_pii(): request_info = event.setdefault("request", {}) request_info["api_target"] = "graphql" diff --git a/sentry_sdk/integrations/huey.py b/sentry_sdk/integrations/huey.py index 9641160099..43c03936b1 100644 --- a/sentry_sdk/integrations/huey.py +++ b/sentry_sdk/integrations/huey.py @@ -6,10 +6,15 @@ from sentry_sdk._compat import reraise from sentry_sdk._types import TYPE_CHECKING from sentry_sdk import Hub +from sentry_sdk.api import continue_trace, get_baggage, get_traceparent from sentry_sdk.consts import OP from sentry_sdk.hub import _should_send_default_pii from sentry_sdk.integrations import DidNotEnable, Integration -from sentry_sdk.tracing import Transaction, TRANSACTION_SOURCE_TASK +from sentry_sdk.tracing import ( + BAGGAGE_HEADER_NAME, + SENTRY_TRACE_HEADER_NAME, + TRANSACTION_SOURCE_TASK, +) from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, @@ -25,7 +30,7 @@ F = TypeVar("F", bound=Callable[..., Any]) try: - from huey.api import Huey, Result, ResultGroup, Task + from huey.api import Huey, Result, ResultGroup, Task, PeriodicTask from huey.exceptions import CancelExecution, RetryTask, TaskLockedException except ImportError: raise DidNotEnable("Huey is not installed") @@ -56,6 +61,14 @@ def _sentry_enqueue(self, task): return old_enqueue(self, task) with hub.start_span(op=OP.QUEUE_SUBMIT_HUEY, description=task.name): + if not isinstance(task, PeriodicTask): + # Attach trace propagation data to task kwargs. We do + # not do this for periodic tasks, as these don't + # really have an originating transaction. + task.kwargs["sentry_headers"] = { + BAGGAGE_HEADER_NAME: get_baggage(), + SENTRY_TRACE_HEADER_NAME: get_traceparent(), + } return old_enqueue(self, task) Huey.enqueue = _sentry_enqueue @@ -145,12 +158,15 @@ def _sentry_execute(self, task, timestamp=None): scope.clear_breadcrumbs() scope.add_event_processor(_make_event_processor(task)) - transaction = Transaction( + sentry_headers = task.kwargs.pop("sentry_headers", None) + + transaction = continue_trace( + sentry_headers or {}, name=task.name, - status="ok", op=OP.QUEUE_TASK_HUEY, source=TRANSACTION_SOURCE_TASK, ) + transaction.set_status("ok") if not getattr(task, "_sentry_is_patched", False): task.execute = _wrap_task_execute(task.execute) diff --git a/sentry_sdk/integrations/logging.py b/sentry_sdk/integrations/logging.py index ee6bb8e1d1..d455983fc5 100644 --- a/sentry_sdk/integrations/logging.py +++ b/sentry_sdk/integrations/logging.py @@ -16,6 +16,7 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import MutableMapping from logging import LogRecord from typing import Any from typing import Dict @@ -156,7 +157,7 @@ def _logging_to_event_level(self, record): ) def _extra_from_record(self, record): - # type: (LogRecord) -> Dict[str, None] + # type: (LogRecord) -> MutableMapping[str, object] return { k: v for k, v in iteritems(vars(record)) @@ -225,7 +226,9 @@ def _emit(self, record): hint["log_record"] = record - event["level"] = self._logging_to_event_level(record) + level = self._logging_to_event_level(record) + if level in {"debug", "info", "warning", "error", "critical", "fatal"}: + event["level"] = level # type: ignore[typeddict-item] event["logger"] = record.name # Log records from `warnings` module as separate issues diff --git a/sentry_sdk/integrations/modules.py b/sentry_sdk/integrations/modules.py index 5b595b4032..fa0fbf8936 100644 --- a/sentry_sdk/integrations/modules.py +++ b/sentry_sdk/integrations/modules.py @@ -9,8 +9,6 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict - from sentry_sdk._types import Event @@ -22,7 +20,7 @@ def setup_once(): # type: () -> None @add_global_event_processor def processor(event, hint): - # type: (Event, Any) -> Dict[str, Any] + # type: (Event, Any) -> Event if event.get("type") == "transaction": return event diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py new file mode 100644 index 0000000000..0e71029b60 --- /dev/null +++ b/sentry_sdk/integrations/openai.py @@ -0,0 +1,304 @@ +from sentry_sdk import consts +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Iterable, List, Optional, Callable, Iterator + from sentry_sdk.tracing import Span + +import sentry_sdk +from sentry_sdk._functools import wraps +from sentry_sdk.hub import Hub, _should_send_default_pii +from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception + +try: + from openai.resources.chat.completions import Completions + from openai.resources import Embeddings + + if TYPE_CHECKING: + from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk +except ImportError: + raise DidNotEnable("OpenAI not installed") + +try: + import tiktoken # type: ignore + + enc = tiktoken.get_encoding("cl100k_base") + + def count_tokens(s): + # type: (str) -> int + return len(enc.encode_ordinary(s)) + + logger.debug("[OpenAI] using tiktoken to count tokens") +except ImportError: + logger.info( + "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" + "Please install 'tiktoken' if you aren't receiving token usage in Sentry." + "See https://docs.sentry.io/platforms/python/integrations/openai/ for more information." + ) + + def count_tokens(s): + # type: (str) -> int + return 0 + + +COMPLETION_TOKENS_USED = "ai.completion_tоkens.used" +PROMPT_TOKENS_USED = "ai.prompt_tоkens.used" +TOTAL_TOKENS_USED = "ai.total_tоkens.used" + + +class OpenAIIntegration(Integration): + identifier = "openai" + + def __init__(self, include_prompts=True): + # type: (OpenAIIntegration, bool) -> None + self.include_prompts = include_prompts + + @staticmethod + def setup_once(): + # type: () -> None + Completions.create = _wrap_chat_completion_create(Completions.create) + Embeddings.create = _wrap_embeddings_create(Embeddings.create) + + +def _capture_exception(hub, exc): + # type: (Hub, Any) -> None + + if hub.client is not None: + event, hint = event_from_exception( + exc, + client_options=hub.client.options, + mechanism={"type": "openai", "handled": False}, + ) + hub.capture_event(event, hint=hint) + + +def _normalize_data(data): + # type: (Any) -> Any + + # convert pydantic data (e.g. OpenAI v1+) to json compatible format + if hasattr(data, "model_dump"): + try: + return data.model_dump() + except Exception as e: + logger.warning("Could not convert pydantic data to JSON: %s", e) + return data + if isinstance(data, list): + return list(_normalize_data(x) for x in data) + if isinstance(data, dict): + return {k: _normalize_data(v) for (k, v) in data.items()} + return data + + +def set_data_normalized(span, key, value): + # type: (Span, str, Any) -> None + span.set_data(key, _normalize_data(value)) + + +def _calculate_chat_completion_usage( + messages, response, span, streaming_message_responses=None +): + # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]]) -> None + completion_tokens = 0 + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "completion_tokens") and isinstance( + response.usage.completion_tokens, int + ): + completion_tokens = response.usage.completion_tokens + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + for message in messages: + if "content" in message: + prompt_tokens += count_tokens(message["content"]) + + if completion_tokens == 0: + if streaming_message_responses is not None: + for message in streaming_message_responses: + completion_tokens += count_tokens(message) + elif hasattr(response, "choices"): + for choice in response.choices: + if hasattr(choice, "message"): + completion_tokens += count_tokens(choice.message) + + if total_tokens == 0: + total_tokens = prompt_tokens + completion_tokens + + if completion_tokens != 0: + set_data_normalized(span, COMPLETION_TOKENS_USED, completion_tokens) + if prompt_tokens != 0: + set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens) + if total_tokens != 0: + set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens) + + +def _wrap_chat_completion_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + @wraps(f) + def new_chat_completion(*args, **kwargs): + # type: (*Any, **Any) -> Any + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + if "messages" not in kwargs: + # invalid call (in all versions of openai), let it return error + return f(*args, **kwargs) + + try: + iter(kwargs["messages"]) + except TypeError: + # invalid call (in all versions), messages must be iterable + return f(*args, **kwargs) + + kwargs["messages"] = list(kwargs["messages"]) + messages = kwargs["messages"] + model = kwargs.get("model") + streaming = kwargs.get("stream") + + span = sentry_sdk.start_span( + op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion" + ) + span.__enter__() + try: + res = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + span.__exit__(None, None, None) + raise e from None + + with capture_internal_exceptions(): + if _should_send_default_pii() and integration.include_prompts: + set_data_normalized(span, "ai.input_messages", messages) + + set_data_normalized(span, "ai.model_id", model) + set_data_normalized(span, "ai.streaming", streaming) + + if hasattr(res, "choices"): + if _should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, + "ai.responses", + list(map(lambda x: x.message, res.choices)), + ) + _calculate_chat_completion_usage(messages, res, span) + span.__exit__(None, None, None) + elif hasattr(res, "_iterator"): + data_buf: list[list[str]] = [] # one for each choice + + old_iterator = res._iterator # type: Iterator[ChatCompletionChunk] + + def new_iterator(): + # type: () -> Iterator[ChatCompletionChunk] + with capture_internal_exceptions(): + for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list( + map(lambda chunk: "".join(chunk), data_buf) + ) + if ( + _should_send_default_pii() + and integration.include_prompts + ): + set_data_normalized(span, "ai.responses", all_responses) + _calculate_chat_completion_usage( + messages, res, span, all_responses + ) + span.__exit__(None, None, None) + + res._iterator = new_iterator() + else: + set_data_normalized(span, "unknown_response", True) + span.__exit__(None, None, None) + return res + + return new_chat_completion + + +def _wrap_embeddings_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + + @wraps(f) + def new_embeddings_create(*args, **kwargs): + # type: (*Any, **Any) -> Any + + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + with sentry_sdk.start_span( + op=consts.OP.OPENAI_EMBEDDINGS_CREATE, + description="OpenAI Embedding Creation", + ) as span: + if "input" in kwargs and ( + _should_send_default_pii() and integration.include_prompts + ): + if isinstance(kwargs["input"], str): + set_data_normalized(span, "ai.input_messages", [kwargs["input"]]) + elif ( + isinstance(kwargs["input"], list) + and len(kwargs["input"]) > 0 + and isinstance(kwargs["input"][0], str) + ): + set_data_normalized(span, "ai.input_messages", kwargs["input"]) + if "model" in kwargs: + set_data_normalized(span, "ai.model_id", kwargs["model"]) + try: + response = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + raise e from None + + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + prompt_tokens = count_tokens(kwargs["input"] or "") + + if total_tokens == 0: + total_tokens = prompt_tokens + + set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens) + set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens) + + return response + + return new_embeddings_create diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 0ed4e7f709..0db698e239 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -1,3 +1,5 @@ +from time import time + from opentelemetry.context import get_value # type: ignore from opentelemetry.sdk.trace import SpanProcessor # type: ignore from opentelemetry.semconv.trace import SpanAttributes # type: ignore @@ -33,6 +35,7 @@ from sentry_sdk._types import Event, Hint OPEN_TELEMETRY_CONTEXT = "otel" +SPAN_MAX_TIME_OPEN_MINUTES = 10 def link_trace_context_to_error_event(event, otel_span_map): @@ -76,6 +79,9 @@ class SentrySpanProcessor(SpanProcessor): # type: ignore # The mapping from otel span ids to sentry spans otel_span_map = {} # type: Dict[str, Union[Transaction, SentrySpan]] + # The currently open spans. Elements will be discarded after SPAN_MAX_TIME_OPEN_MINUTES + open_spans = {} # type: dict[int, set[str]] + def __new__(cls): # type: () -> SentrySpanProcessor if not hasattr(cls, "instance"): @@ -90,6 +96,24 @@ def global_event_processor(event, hint): # type: (Event, Hint) -> Event return link_trace_context_to_error_event(event, self.otel_span_map) + def _prune_old_spans(self): + # type: (SentrySpanProcessor) -> None + """ + Prune spans that have been open for too long. + """ + current_time_minutes = int(time() / 60) + for span_start_minutes in list( + self.open_spans.keys() + ): # making a list because we change the dict + # prune empty open spans buckets + if self.open_spans[span_start_minutes] == set(): + self.open_spans.pop(span_start_minutes) + + # prune old buckets + elif current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES: + for span_id in self.open_spans.pop(span_start_minutes): + self.otel_span_map.pop(span_id, None) + def on_start(self, otel_span, parent_context=None): # type: (OTelSpan, Optional[SpanContext]) -> None hub = Hub.current @@ -125,7 +149,9 @@ def on_start(self, otel_span, parent_context=None): sentry_span = sentry_parent_span.start_child( span_id=trace_data["span_id"], description=otel_span.name, - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) else: @@ -135,12 +161,22 @@ def on_start(self, otel_span, parent_context=None): parent_span_id=parent_span_id, trace_id=trace_data["trace_id"], baggage=trace_data["baggage"], - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) self.otel_span_map[trace_data["span_id"]] = sentry_span + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision + self.open_spans.setdefault(span_start_in_minutes, set()).add( + trace_data["span_id"] + ) + self._prune_old_spans() + def on_end(self, otel_span): # type: (OTelSpan) -> None hub = Hub.current @@ -173,7 +209,15 @@ def on_end(self, otel_span): else: self._update_span_with_otel_data(sentry_span, otel_span) - sentry_span.finish(end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9)) + sentry_span.finish( + end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9) + ) # OTel spans have nanosecond precision + + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision + self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id) + self._prune_old_spans() def _is_sentry_span(self, hub, otel_span): # type: (Hub, OTelSpan) -> bool diff --git a/sentry_sdk/integrations/pyramid.py b/sentry_sdk/integrations/pyramid.py index 80750f0268..3b9b2fdb96 100644 --- a/sentry_sdk/integrations/pyramid.py +++ b/sentry_sdk/integrations/pyramid.py @@ -36,7 +36,7 @@ from webob.compat import cgi_FieldStorage # type: ignore from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor if getattr(Request, "authenticated_userid", None): @@ -216,7 +216,7 @@ def size_of_file(self, postdata): def _make_event_processor(weak_request, integration): # type: (Callable[[], Request], PyramidIntegration) -> EventProcessor def pyramid_event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event request = weak_request() if request is None: return event diff --git a/sentry_sdk/integrations/quart.py b/sentry_sdk/integrations/quart.py index 4dee751d65..8803fa7cea 100644 --- a/sentry_sdk/integrations/quart.py +++ b/sentry_sdk/integrations/quart.py @@ -20,10 +20,9 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict from typing import Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor try: import quart_auth # type: ignore @@ -186,7 +185,7 @@ async def _request_websocket_started(app, **kwargs): def _make_request_event_processor(app, request, integration): # type: (Quart, Request, QuartIntegration) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. @@ -231,7 +230,7 @@ async def _capture_exception(sender, exception, **kwargs): def _add_user_to_event(event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None if quart_auth is None: return diff --git a/sentry_sdk/integrations/rq.py b/sentry_sdk/integrations/rq.py index b5eeb0be85..2b32e59880 100644 --- a/sentry_sdk/integrations/rq.py +++ b/sentry_sdk/integrations/rq.py @@ -27,9 +27,9 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, Dict + from typing import Any, Callable - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor from sentry_sdk.utils import ExcInfo from rq.job import Job @@ -126,12 +126,12 @@ def sentry_patched_enqueue_job(self, job, **kwargs): def _make_event_processor(weak_job): # type: (Callable[[], Job]) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event job = weak_job() if job is not None: with capture_internal_exceptions(): extra = event.setdefault("extra", {}) - extra["rq-job"] = { + rq_job = { "job_id": job.id, "func": job.func_name, "args": job.args, @@ -140,9 +140,11 @@ def event_processor(event, hint): } if job.enqueued_at: - extra["rq-job"]["enqueued_at"] = format_timestamp(job.enqueued_at) + rq_job["enqueued_at"] = format_timestamp(job.enqueued_at) if job.started_at: - extra["rq-job"]["started_at"] = format_timestamp(job.started_at) + rq_job["started_at"] = format_timestamp(job.started_at) + + extra["rq-job"] = rq_job if "exc_info" in hint: with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/spark/spark_worker.py b/sentry_sdk/integrations/spark/spark_worker.py index cd4eb0f28b..632e870973 100644 --- a/sentry_sdk/integrations/spark/spark_worker.py +++ b/sentry_sdk/integrations/spark/spark_worker.py @@ -58,7 +58,7 @@ def _capture_exception(exc_info, hub): if rv: rv.reverse() hint = event_hint_with_exc_info(exc_info) - event = {"level": "error", "exception": {"values": rv}} + event = {"level": "error", "exception": {"values": rv}} # type: Event _tag_task_context() diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index ed95c757f1..79bb18aa78 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -32,6 +32,7 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Tuple from sentry_sdk.scope import Scope as SentryScope + from sentry_sdk._types import Event try: import starlette # type: ignore @@ -407,9 +408,9 @@ async def _sentry_async_func(*args, **kwargs): info = await extractor.extract_request_info() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event # Add info from request to event request_info = event.get("request", {}) @@ -455,9 +456,9 @@ def _sentry_sync_func(*args, **kwargs): cookies = extractor.extract_cookies_from_request() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # Extract information from request request_info = event.get("request", {}) diff --git a/sentry_sdk/integrations/starlite.py b/sentry_sdk/integrations/starlite.py index 3900ce8c8a..070675c2e7 100644 --- a/sentry_sdk/integrations/starlite.py +++ b/sentry_sdk/integrations/starlite.py @@ -219,7 +219,11 @@ def event_processor(event: "Event", _: "Dict[str, Any]") -> "Event": tx_info = {"source": TRANSACTION_SOURCE_ROUTE} event.update( - request=request_info, transaction=tx_name, transaction_info=tx_info + { + "request": request_info, + "transaction": tx_name, + "transaction_info": tx_info, + } ) return event diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index a5c3bfb2ae..0a17834a40 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -39,7 +39,7 @@ "name": platform.python_implementation(), "version": "%s.%s.%s" % (sys.version_info[:3]), "build": sys.version, -} +} # type: dict[str, object] class StdlibIntegration(Integration): diff --git a/sentry_sdk/integrations/strawberry.py b/sentry_sdk/integrations/strawberry.py index 8f4314f663..3d450e0692 100644 --- a/sentry_sdk/integrations/strawberry.py +++ b/sentry_sdk/integrations/strawberry.py @@ -29,11 +29,11 @@ raise DidNotEnable("strawberry-graphql is not installed") if TYPE_CHECKING: - from typing import Any, Callable, Dict, Generator, List, Optional + from typing import Any, Callable, Generator, List, Optional from graphql import GraphQLError, GraphQLResolveInfo # type: ignore from strawberry.http import GraphQLHTTPResponse from strawberry.types import ExecutionContext, ExecutionResult # type: ignore - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor ignore_logger("strawberry.execution") @@ -349,21 +349,21 @@ def _make_request_event_processor(execution_context): # type: (ExecutionContext) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii(): request_data = event.setdefault("request", {}) request_data["api_target"] = "graphql" if not request_data.get("data"): - request_data["data"] = {"query": execution_context.query} + data = {"query": execution_context.query} if execution_context.variables: - request_data["data"]["variables"] = execution_context.variables + data["variables"] = execution_context.variables if execution_context.operation_name: - request_data["data"][ - "operationName" - ] = execution_context.operation_name + data["operationName"] = execution_context.operation_name + + request_data["data"] = data else: try: @@ -380,7 +380,7 @@ def _make_response_event_processor(response_data): # type: (GraphQLHTTPResponse) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii(): contexts = event.setdefault("contexts", {}) diff --git a/sentry_sdk/integrations/tornado.py b/sentry_sdk/integrations/tornado.py index 8af93c47f3..c6f7700f12 100644 --- a/sentry_sdk/integrations/tornado.py +++ b/sentry_sdk/integrations/tornado.py @@ -41,7 +41,7 @@ from typing import Callable from typing import Generator - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor class TornadoIntegration(Integration): @@ -155,7 +155,7 @@ def _capture_exception(ty, value, tb): def _make_event_processor(weak_handler): # type: (Callable[[], RequestHandler]) -> EventProcessor def tornado_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event handler = weak_handler() if handler is None: return event @@ -164,7 +164,7 @@ def tornado_processor(event, hint): with capture_internal_exceptions(): method = getattr(handler, handler.request.method.lower()) - event["transaction"] = transaction_from_function(method) + event["transaction"] = transaction_from_function(method) or "" event["transaction_info"] = {"source": TRANSACTION_SOURCE_COMPONENT} with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index 0d53766efb..e7fd0da66d 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -27,7 +27,7 @@ from typing import Protocol from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor WsgiResponseIter = TypeVar("WsgiResponseIter") WsgiResponseHeaders = TypeVar("WsgiResponseHeaders") @@ -254,7 +254,7 @@ def _make_wsgi_event_processor(environ, use_x_forwarded_for): headers = _filter_headers(dict(_get_headers(environ))) def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event with capture_internal_exceptions(): # if the code below fails halfway through we at least have some data request_info = event.setdefault("request", {}) diff --git a/sentry_sdk/metrics.py b/sentry_sdk/metrics.py index b52e30b6b9..b59cf033ec 100644 --- a/sentry_sdk/metrics.py +++ b/sentry_sdk/metrics.py @@ -55,7 +55,7 @@ _in_metrics = ContextVar("in_metrics", default=False) _sanitize_key = partial(re.compile(r"[^a-zA-Z0-9_/.-]+").sub, "_") -_sanitize_value = partial(re.compile(r"[^\w\d_:/@\.{}\[\]$-]+", re.UNICODE).sub, "_") +_sanitize_value = partial(re.compile(r"[^\w\d\s_:/@\.{}\[\]$-]+", re.UNICODE).sub, "") _set = set # set is shadowed below GOOD_TRANSACTION_SOURCES = frozenset( diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index be954b2a2c..4fa3e481ae 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -42,6 +42,8 @@ from sentry_sdk.utils import ( capture_internal_exception, filename_for_module, + get_current_thread_meta, + is_gevent, is_valid_sample_rate, logger, nanosecond_time, @@ -62,7 +64,7 @@ from typing_extensions import TypedDict import sentry_sdk.tracing - from sentry_sdk._types import SamplingContext, ProfilerMode + from sentry_sdk._types import Event, SamplingContext, ProfilerMode ThreadId = str @@ -126,32 +128,16 @@ try: - from gevent import get_hub as get_gevent_hub # type: ignore - from gevent.monkey import get_original, is_module_patched # type: ignore + from gevent.monkey import get_original # type: ignore from gevent.threadpool import ThreadPool # type: ignore thread_sleep = get_original("time", "sleep") except ImportError: - - def get_gevent_hub(): - # type: () -> Any - return None - thread_sleep = time.sleep - def is_module_patched(*args, **kwargs): - # type: (*Any, **Any) -> bool - # unable to import from gevent means no modules have been patched - return False - ThreadPool = None -def is_gevent(): - # type: () -> bool - return is_module_patched("threading") or is_module_patched("_thread") - - _scheduler = None # type: Optional[Scheduler] # The default sampling frequency to use. This is set at 101 in order to @@ -389,52 +375,6 @@ def get_frame_name(frame): MAX_PROFILE_DURATION_NS = int(3e10) # 30 seconds -def get_current_thread_id(thread=None): - # type: (Optional[threading.Thread]) -> Optional[int] - """ - Try to get the id of the current thread, with various fall backs. - """ - - # if a thread is specified, that takes priority - if thread is not None: - try: - thread_id = thread.ident - if thread_id is not None: - return thread_id - except AttributeError: - pass - - # if the app is using gevent, we should look at the gevent hub first - # as the id there differs from what the threading module reports - if is_gevent(): - gevent_hub = get_gevent_hub() - if gevent_hub is not None: - try: - # this is undocumented, so wrap it in try except to be safe - return gevent_hub.thread_ident - except AttributeError: - pass - - # use the current thread's id if possible - try: - current_thread_id = threading.current_thread().ident - if current_thread_id is not None: - return current_thread_id - except AttributeError: - pass - - # if we can't get the current thread id, fall back to the main thread id - try: - main_thread_id = threading.main_thread().ident - if main_thread_id is not None: - return main_thread_id - except AttributeError: - pass - - # we've tried everything, time to give up - return None - - class Profile(object): def __init__( self, @@ -456,7 +396,7 @@ def __init__( # Various framework integrations are capable of overwriting the active thread id. # If it is set to `None` at the end of the profile, we fall back to the default. - self._default_active_thread_id = get_current_thread_id() or 0 # type: int + self._default_active_thread_id = get_current_thread_meta()[0] or 0 # type: int self.active_thread_id = None # type: Optional[int] try: @@ -479,7 +419,7 @@ def __init__( def update_active_thread_id(self): # type: () -> None - self.active_thread_id = get_current_thread_id() + self.active_thread_id = get_current_thread_meta()[0] logger.debug( "[Profiling] updating active thread id to {tid}".format( tid=self.active_thread_id @@ -673,7 +613,7 @@ def process(self): } def to_json(self, event_opt, options): - # type: (Any, Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Dict[str, Any] profile = self.process() set_in_app_in_frames( diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index b0dcca8b15..cd974e4a52 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -33,6 +33,8 @@ ) if TYPE_CHECKING: + from collections.abc import MutableMapping + from typing import Any from typing import Callable from typing import Deque @@ -53,6 +55,7 @@ EventProcessor, ExcInfo, Hint, + LogLevelStr, Type, ) @@ -414,15 +417,15 @@ def iter_trace_propagation_headers(self, *args, **kwargs): def clear(self): # type: () -> None """Clears the entire scope.""" - self._level = None # type: Optional[str] + self._level = None # type: Optional[LogLevelStr] self._fingerprint = None # type: Optional[List[str]] self._transaction = None # type: Optional[str] - self._transaction_info = {} # type: Dict[str, str] + self._transaction_info = {} # type: MutableMapping[str, str] self._user = None # type: Optional[Dict[str, Any]] self._tags = {} # type: Dict[str, Any] self._contexts = {} # type: Dict[str, Dict[str, Any]] - self._extras = {} # type: Dict[str, Any] + self._extras = {} # type: MutableMapping[str, Any] self._attachments = [] # type: List[Attachment] self.clear_breadcrumbs() @@ -438,13 +441,28 @@ def clear(self): @_attr_setter def level(self, value): - # type: (Optional[str]) -> None - """When set this overrides the level. Deprecated in favor of set_level.""" + # type: (LogLevelStr) -> None + """ + When set this overrides the level. + + .. deprecated:: 1.0.0 + Use :func:`set_level` instead. + + :param value: The level to set. + """ + logger.warning( + "Deprecated: use .set_level() instead. This will be removed in the future." + ) + self._level = value def set_level(self, value): - # type: (Optional[str]) -> None - """Sets the level for the scope.""" + # type: (LogLevelStr) -> None + """ + Sets the level for the scope. + + :param value: The level to set. + """ self._level = value @_attr_setter @@ -552,20 +570,24 @@ def profile(self, profile): self._profile = profile - def set_tag( - self, - key, # type: str - value, # type: Any - ): - # type: (...) -> None - """Sets a tag for a key to a specific value.""" + def set_tag(self, key, value): + # type: (str, Any) -> None + """ + Sets a tag for a key to a specific value. + + :param key: Key of the tag to set. + + :param value: Value of the tag to set. + """ self._tags[key] = value - def remove_tag( - self, key # type: str - ): - # type: (...) -> None - """Removes a specific tag.""" + def remove_tag(self, key): + # type: (str) -> None + """ + Removes a specific tag. + + :param key: Key of the tag to remove. + """ self._tags.pop(key, None) def set_context( @@ -574,7 +596,9 @@ def set_context( value, # type: Dict[str, Any] ): # type: (...) -> None - """Binds a context at a certain key to a specific value.""" + """ + Binds a context at a certain key to a specific value. + """ self._contexts[key] = value def remove_context( @@ -848,7 +872,7 @@ def capture_event(self, event, hint=None, client=None, scope=None, **scope_kwarg def capture_message( self, message, level=None, client=None, scope=None, **scope_kwargs ): - # type: (str, Optional[str], Optional[sentry_sdk.Client], Optional[Scope], Any) -> Optional[str] + # type: (str, Optional[LogLevelStr], Optional[sentry_sdk.Client], Optional[Scope], Any) -> Optional[str] """ Captures a message. @@ -876,7 +900,7 @@ def capture_message( event = { "message": message, "level": level, - } + } # type: Event return self.capture_event(event, client=client, scope=scope, **scope_kwargs) @@ -1079,7 +1103,7 @@ def _apply_contexts_to_event(self, event, hint, options): # Add "reply_id" context try: - replay_id = contexts["trace"]["dynamic_sampling_context"]["replay_id"] + replay_id = contexts["trace"]["dynamic_sampling_context"]["replay_id"] # type: ignore except (KeyError, TypeError): replay_id = None @@ -1192,7 +1216,7 @@ def update_from_scope(self, scope): def update_from_kwargs( self, user=None, # type: Optional[Any] - level=None, # type: Optional[str] + level=None, # type: Optional[LogLevelStr] extras=None, # type: Optional[Dict[str, Any]] contexts=None, # type: Optional[Dict[str, Any]] tags=None, # type: Optional[Dict[str, str]] diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py index 838ef08b4b..3f089ab8f6 100644 --- a/sentry_sdk/scrubber.py +++ b/sentry_sdk/scrubber.py @@ -1,3 +1,8 @@ +try: + from typing import cast +except ImportError: + cast = lambda _, obj: obj + from sentry_sdk.utils import ( capture_internal_exceptions, AnnotatedValue, @@ -8,8 +13,6 @@ if TYPE_CHECKING: from sentry_sdk._types import Event - from typing import Any - from typing import Dict from typing import List from typing import Optional @@ -59,19 +62,46 @@ class EventScrubber(object): - def __init__(self, denylist=None): - # type: (Optional[List[str]]) -> None + def __init__(self, denylist=None, recursive=False): + # type: (Optional[List[str]], bool) -> None self.denylist = DEFAULT_DENYLIST if denylist is None else denylist self.denylist = [x.lower() for x in self.denylist] + self.recursive = recursive + + def scrub_list(self, lst): + # type: (object) -> None + """ + If a list is passed to this method, the method recursively searches the list and any + nested lists for any dictionaries. The method calls scrub_dict on all dictionaries + it finds. + If the parameter passed to this method is not a list, the method does nothing. + """ + if not isinstance(lst, list): + return + + for v in lst: + self.scrub_dict(v) # no-op unless v is a dict + self.scrub_list(v) # no-op unless v is a list def scrub_dict(self, d): - # type: (Dict[str, Any]) -> None + # type: (object) -> None + """ + If a dictionary is passed to this method, the method scrubs the dictionary of any + sensitive data. The method calls itself recursively on any nested dictionaries ( + including dictionaries nested in lists) if self.recursive is True. + This method does nothing if the parameter passed to it is not a dictionary. + """ if not isinstance(d, dict): return - for k in d.keys(): - if isinstance(k, string_types) and k.lower() in self.denylist: + for k, v in d.items(): + # The cast is needed because mypy is not smart enough to figure out that k must be a + # string after the isinstance check. + if isinstance(k, string_types) and cast(str, k).lower() in self.denylist: d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() + elif self.recursive: + self.scrub_dict(v) # no-op unless v is a dict + self.scrub_list(v) # no-op unless v is a list def scrub_request(self, event): # type: (Event) -> None diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 80e9ace939..7afe7e0944 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -5,7 +5,12 @@ import sentry_sdk from sentry_sdk.consts import INSTRUMENTER -from sentry_sdk.utils import is_valid_sample_rate, logger, nanosecond_time +from sentry_sdk.utils import ( + get_current_thread_meta, + is_valid_sample_rate, + logger, + nanosecond_time, +) from sentry_sdk._compat import datetime_utcnow, utc_from_timestamp, PY2 from sentry_sdk.consts import SPANDATA from sentry_sdk._types import TYPE_CHECKING @@ -14,7 +19,7 @@ if TYPE_CHECKING: import typing - from collections.abc import Callable + from collections.abc import Callable, MutableMapping from typing import Any from typing import Dict from typing import Iterator @@ -151,7 +156,7 @@ def __init__( self.description = description self.status = status self.hub = hub - self._tags = {} # type: Dict[str, str] + self._tags = {} # type: MutableMapping[str, str] self._data = {} # type: Dict[str, Any] self._containing_transaction = containing_transaction if start_timestamp is None: @@ -172,6 +177,9 @@ def __init__( self._span_recorder = None # type: Optional[_SpanRecorder] self._local_aggregator = None # type: Optional[LocalAggregator] + thread_id, thread_name = get_current_thread_meta() + self.set_thread(thread_id, thread_name) + # TODO this should really live on the Transaction class rather than the Span # class def init_span_recorder(self, maxlen): @@ -418,6 +426,15 @@ def set_status(self, value): # type: (str) -> None self.status = value + def set_thread(self, thread_id, thread_name): + # type: (Optional[int], Optional[str]) -> None + + if thread_id is not None: + self.set_data(SPANDATA.THREAD_ID, str(thread_id)) + + if thread_name is not None: + self.set_data(SPANDATA.THREAD_NAME, thread_name) + def set_http_status(self, http_status): # type: (int) -> None self.set_tag( diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index 8eb00bed12..9ea9cd0c98 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -1,18 +1,18 @@ from __future__ import print_function import io -import urllib3 -import certifi import gzip +import socket import time - from datetime import timedelta from collections import defaultdict +import urllib3 +import certifi + from sentry_sdk.utils import Dsn, logger, capture_internal_exceptions, json_dumps from sentry_sdk.worker import BackgroundWorker from sentry_sdk.envelope import Envelope, Item, PayloadRef - from sentry_sdk._compat import datetime_utcnow from sentry_sdk._types import TYPE_CHECKING @@ -22,6 +22,7 @@ from typing import Callable from typing import Dict from typing import Iterable + from typing import List from typing import Optional from typing import Tuple from typing import Type @@ -41,6 +42,21 @@ from urllib import getproxies # type: ignore +KEEP_ALIVE_SOCKET_OPTIONS = [] +for option in [ + (socket.SOL_SOCKET, lambda: getattr(socket, "SO_KEEPALIVE"), 1), # noqa: B009 + (socket.SOL_TCP, lambda: getattr(socket, "TCP_KEEPIDLE"), 45), # noqa: B009 + (socket.SOL_TCP, lambda: getattr(socket, "TCP_KEEPINTVL"), 10), # noqa: B009 + (socket.SOL_TCP, lambda: getattr(socket, "TCP_KEEPCNT"), 6), # noqa: B009 +]: + try: + KEEP_ALIVE_SOCKET_OPTIONS.append((option[0], option[1](), option[2])) + except AttributeError: + # a specific option might not be available on specific systems, + # e.g. TCP_KEEPIDLE doesn't exist on macOS + pass + + class Transport(object): """Baseclass for all transports. @@ -441,12 +457,31 @@ def _send_envelope( def _get_pool_options(self, ca_certs): # type: (Optional[Any]) -> Dict[str, Any] - return { + options = { "num_pools": self._num_pools, "cert_reqs": "CERT_REQUIRED", "ca_certs": ca_certs or certifi.where(), } + socket_options = None # type: Optional[List[Tuple[int, int, int | bytes]]] + + if self.options["socket_options"] is not None: + socket_options = self.options["socket_options"] + + if self.options["keep_alive"]: + if socket_options is None: + socket_options = [] + + used_options = {(o[0], o[1]) for o in socket_options} + for default_option in KEEP_ALIVE_SOCKET_OPTIONS: + if (default_option[0], default_option[1]) not in used_options: + socket_options.append(default_option) + + if socket_options is not None: + options["socket_options"] = socket_options + + return options + def _in_no_proxy(self, parsed_dsn): # type: (Dsn) -> bool no_proxy = getproxies().get("no") diff --git a/sentry_sdk/types.py b/sentry_sdk/types.py new file mode 100644 index 0000000000..9a96ed489f --- /dev/null +++ b/sentry_sdk/types.py @@ -0,0 +1,21 @@ +""" +This module contains type definitions for the Sentry SDK's public API. +The types are re-exported from the internal module `sentry_sdk._types`. + +Disclaimer: Since types are a form of documentation, type definitions +may change in minor releases. Removing a type would be considered a +breaking change, and so we will only remove type definitions in major +releases. +""" + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from sentry_sdk._types import Event, Hint +else: + # The lines below allow the types to be imported from outside `if TYPE_CHECKING` + # guards. The types in this module are only intended to be used for type hints. + Event = None + Hint = None + +__all__ = ("Event", "Hint") diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 7c10d7cf43..a64b4b4d98 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -75,7 +75,7 @@ Union, ) - from sentry_sdk._types import EndpointType, ExcInfo + from sentry_sdk._types import EndpointType, Event, ExcInfo epoch = datetime(1970, 1, 1) @@ -975,7 +975,7 @@ def to_string(value): def iter_event_stacktraces(event): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Event) -> Iterator[Dict[str, Any]] if "stacktrace" in event: yield event["stacktrace"] if "threads" in event: @@ -989,14 +989,14 @@ def iter_event_stacktraces(event): def iter_event_frames(event): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Event) -> Iterator[Dict[str, Any]] for stacktrace in iter_event_stacktraces(event): for frame in stacktrace.get("frames") or (): yield frame def handle_in_app(event, in_app_exclude=None, in_app_include=None, project_root=None): - # type: (Dict[str, Any], Optional[List[str]], Optional[List[str]], Optional[str]) -> Dict[str, Any] + # type: (Event, Optional[List[str]], Optional[List[str]], Optional[str]) -> Event for stacktrace in iter_event_stacktraces(event): set_in_app_in_frames( stacktrace.get("frames"), @@ -1074,7 +1074,7 @@ def event_from_exception( client_options=None, # type: Optional[Dict[str, Any]] mechanism=None, # type: Optional[Dict[str, Any]] ): - # type: (...) -> Tuple[Dict[str, Any], Dict[str, Any]] + # type: (...) -> Tuple[Event, Dict[str, Any]] exc_info = exc_info_from_error(exc_info) hint = event_hint_with_exc_info(exc_info) return ( @@ -1746,9 +1746,14 @@ def now(): try: + from gevent import get_hub as get_gevent_hub from gevent.monkey import is_module_patched except ImportError: + def get_gevent_hub(): + # type: () -> Any + return None + def is_module_patched(*args, **kwargs): # type: (*Any, **Any) -> bool # unable to import from gevent means no modules have been patched @@ -1758,3 +1763,54 @@ def is_module_patched(*args, **kwargs): def is_gevent(): # type: () -> bool return is_module_patched("threading") or is_module_patched("_thread") + + +def get_current_thread_meta(thread=None): + # type: (Optional[threading.Thread]) -> Tuple[Optional[int], Optional[str]] + """ + Try to get the id of the current thread, with various fall backs. + """ + + # if a thread is specified, that takes priority + if thread is not None: + try: + thread_id = thread.ident + thread_name = thread.name + if thread_id is not None: + return thread_id, thread_name + except AttributeError: + pass + + # if the app is using gevent, we should look at the gevent hub first + # as the id there differs from what the threading module reports + if is_gevent(): + gevent_hub = get_gevent_hub() + if gevent_hub is not None: + try: + # this is undocumented, so wrap it in try except to be safe + return gevent_hub.thread_ident, None + except AttributeError: + pass + + # use the current thread's id if possible + try: + thread = threading.current_thread() + thread_id = thread.ident + thread_name = thread.name + if thread_id is not None: + return thread_id, thread_name + except AttributeError: + pass + + # if we can't get the current thread id, fall back to the main thread id + try: + thread = threading.main_thread() + thread_id = thread.ident + thread_name = thread.name + if thread_id is not None: + return thread_id, thread_name + except AttributeError: + pass + + # we've tried everything, time to give up + return None, None diff --git a/setup.py b/setup.py index ef268c49c9..ff90fae92e 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="1.40.6", + version="1.44.0", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", @@ -50,6 +50,7 @@ def get_file_text(file_name): "beam": ["apache-beam>=2.12"], "bottle": ["bottle>=0.12.13"], "celery": ["celery>=3"], + "celery-redbeat": ["celery-redbeat>=2"], "chalice": ["chalice>=1.16.0"], "clickhouse-driver": ["clickhouse-driver>=0.2.0"], "django": ["django>=1.8"], @@ -60,6 +61,7 @@ def get_file_text(file_name): "httpx": ["httpx>=0.16.0"], "huey": ["huey>=2"], "loguru": ["loguru>=0.5"], + "openai": ["openai>=1.0.0", "tiktoken>=0.3.0"], "opentelemetry": ["opentelemetry-distro>=0.35b0"], "opentelemetry-experimental": [ "opentelemetry-distro~=0.40b0", diff --git a/tests/conftest.py b/tests/conftest.py index 85c65462cb..c87111cbf7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -652,3 +652,15 @@ def patch_start_tracing_child(fake_transaction_is_none=False): return_value=fake_transaction, ): yield fake_start_child + + +class ApproxDict(dict): + def __eq__(self, other): + # For an ApproxDict to equal another dict, the other dict just needs to contain + # all the keys from the ApproxDict with the same values. + # + # The other dict may contain additional keys with any value. + return all(key in other and other[key] == value for key, value in self.items()) + + def __ne__(self, other): + return not self.__eq__(other) diff --git a/tests/integrations/aiohttp/test_aiohttp.py b/tests/integrations/aiohttp/test_aiohttp.py index de5cf19f44..90ca466175 100644 --- a/tests/integrations/aiohttp/test_aiohttp.py +++ b/tests/integrations/aiohttp/test_aiohttp.py @@ -9,6 +9,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.integrations.aiohttp import AioHttpIntegration +from tests.conftest import ApproxDict try: from unittest import mock # python 3.3 and above @@ -495,15 +496,17 @@ async def handler(request): crumb = event["breadcrumbs"]["values"][0] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": "http://127.0.0.1:{}/".format(raw_server.port), - "http.fragment": "", - "http.method": "GET", - "http.query": "", - "http.response.status_code": 200, - "reason": "OK", - "extra": "foo", - } + assert crumb["data"] == ApproxDict( + { + "url": "http://127.0.0.1:{}/".format(raw_server.port), + "http.fragment": "", + "http.method": "GET", + "http.query": "", + "http.response.status_code": 200, + "reason": "OK", + "extra": "foo", + } + ) @pytest.mark.asyncio diff --git a/tests/integrations/asyncpg/test_asyncpg.py b/tests/integrations/asyncpg/test_asyncpg.py index a839031c3b..611d8ea9d9 100644 --- a/tests/integrations/asyncpg/test_asyncpg.py +++ b/tests/integrations/asyncpg/test_asyncpg.py @@ -34,6 +34,7 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.tracing_utils import record_sql_queries from sentry_sdk._compat import contextmanager +from tests.conftest import ApproxDict try: from unittest import mock @@ -46,13 +47,15 @@ ) CRUMBS_CONNECT = { "category": "query", - "data": { - "db.name": PG_NAME, - "db.system": "postgresql", - "db.user": PG_USER, - "server.address": PG_HOST, - "server.port": PG_PORT, - }, + "data": ApproxDict( + { + "db.name": PG_NAME, + "db.system": "postgresql", + "db.user": PG_USER, + "server.address": PG_HOST, + "server.port": PG_PORT, + } + ), "message": "connect", "type": "default", } diff --git a/tests/integrations/aws_lambda/client.py b/tests/integrations/aws_lambda/client.py index 265ce6a520..298ebd920d 100644 --- a/tests/integrations/aws_lambda/client.py +++ b/tests/integrations/aws_lambda/client.py @@ -240,7 +240,7 @@ def run_lambda_function( FunctionName=full_fn_name, ) print( - f"Lambda function {full_fn_name} in AWS already existing, taking it (and do not create a local one)" + "Lambda function in AWS already existing, taking it (and do not create a local one)" ) except client.exceptions.ResourceNotFoundException: function_exists_in_aws = False @@ -251,14 +251,9 @@ def run_lambda_function( dir_already_existing = os.path.isdir(base_dir) if dir_already_existing: - print( - f"Local Lambda function directory ({base_dir}) already exists, skipping creation" - ) + print("Local Lambda function directory already exists, skipping creation") if not dir_already_existing: - print( - f"Creating Lambda function package ({full_fn_name}) locally in directory {base_dir}" - ) os.mkdir(base_dir) _create_lambda_package( base_dir, code, initial_handler, layer, syntax_check, subprocess_kwargs @@ -321,10 +316,9 @@ def clean_up(): waiter = client.get_waiter("function_active_v2") waiter.wait(FunctionName=full_fn_name) - print(f"Created Lambda function in AWS: {full_fn_name}") except client.exceptions.ResourceConflictException: print( - f"Lambda function ({full_fn_name}) already existing in AWS, this is fine, we will just invoke it." + "Lambda function already exists, this is fine, we will just invoke it." ) response = client.invoke( diff --git a/tests/integrations/aws_lambda/test_aws.py b/tests/integrations/aws_lambda/test_aws.py index 6f51ad14da..5f2dba132d 100644 --- a/tests/integrations/aws_lambda/test_aws.py +++ b/tests/integrations/aws_lambda/test_aws.py @@ -489,6 +489,7 @@ def test_handler(event, context): True, 2, ), + (b"[]", False, 1), ], ) def test_non_dict_event( @@ -661,6 +662,9 @@ def test_handler(event, context): assert response["Payload"]["AssertionError raised"] is False +@pytest.mark.xfail( + reason="The limited log output we depend on is being clogged by a new warning" +) def test_serverless_no_code_instrumentation(run_lambda_function): """ Test that ensures that just by adding a lambda layer containing the @@ -705,6 +709,9 @@ def test_handler(event, context): assert "sentry_handler" in response["LogResult"][3].decode("utf-8") +@pytest.mark.xfail( + reason="The limited log output we depend on is being clogged by a new warning" +) def test_error_has_new_trace_context_performance_enabled(run_lambda_function): envelopes, _, _ = run_lambda_function( LAMBDA_PRELUDE @@ -767,6 +774,9 @@ def test_handler(event, context): ) +@pytest.mark.xfail( + reason="The limited log output we depend on is being clogged by a new warning" +) def test_error_has_existing_trace_context_performance_enabled(run_lambda_function): trace_id = "471a43a4192642f0b136d5159a501701" parent_span_id = "6e8f22c393e68f19" diff --git a/tests/integrations/boto3/test_s3.py b/tests/integrations/boto3/test_s3.py index 5812c2c1bb..8c05b72a3e 100644 --- a/tests/integrations/boto3/test_s3.py +++ b/tests/integrations/boto3/test_s3.py @@ -4,6 +4,7 @@ from sentry_sdk import Hub from sentry_sdk.integrations.boto3 import Boto3Integration +from tests.conftest import ApproxDict from tests.integrations.boto3.aws_mock import MockResponse from tests.integrations.boto3 import read_fixture @@ -65,12 +66,14 @@ def test_streaming(sentry_init, capture_events): span1 = event["spans"][0] assert span1["op"] == "http.client" assert span1["description"] == "aws.s3.GetObject" - assert span1["data"] == { - "http.method": "GET", - "aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf", - "http.fragment": "", - "http.query": "", - } + assert span1["data"] == ApproxDict( + { + "http.method": "GET", + "aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf", + "http.fragment": "", + "http.query": "", + } + ) span2 = event["spans"][1] assert span2["op"] == "http.client.stream" @@ -123,7 +126,13 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): transaction.finish() (event,) = events - assert event["spans"][0]["data"] == { - "http.method": "GET", - # no url data - } + assert event["spans"][0]["data"] == ApproxDict( + { + "http.method": "GET", + # no url data + } + ) + + assert "aws.request.url" not in event["spans"][0]["data"] + assert "http.fragment" not in event["spans"][0]["data"] + assert "http.query" not in event["spans"][0]["data"] diff --git a/tests/integrations/celery/test_celery.py b/tests/integrations/celery/test_celery.py index 0d44ee992e..c6eb55536c 100644 --- a/tests/integrations/celery/test_celery.py +++ b/tests/integrations/celery/test_celery.py @@ -10,6 +10,7 @@ ) from sentry_sdk._compat import text_type +from tests.conftest import ApproxDict from celery import Celery, VERSION from celery.bin import worker @@ -218,6 +219,7 @@ def dummy_task(x, y): assert execution_event["spans"] == [] assert submission_event["spans"] == [ { + "data": ApproxDict(), "description": "dummy_task", "op": "queue.submit.celery", "parent_span_id": submission_event["contexts"]["trace"]["span_id"], diff --git a/tests/integrations/celery/test_celery_beat_crons.py b/tests/integrations/celery/test_celery_beat_crons.py index 9343b3c926..9ffa59b00d 100644 --- a/tests/integrations/celery/test_celery_beat_crons.py +++ b/tests/integrations/celery/test_celery_beat_crons.py @@ -8,6 +8,7 @@ _get_humanized_interval, _get_monitor_config, _patch_beat_apply_entry, + _patch_redbeat_maybe_due, crons_task_success, crons_task_failure, crons_task_retry, @@ -447,3 +448,56 @@ def test_exclude_beat_tasks_option( # The original Scheduler.apply_entry() is called, AND _get_monitor_config is called. assert fake_apply_entry.call_count == 1 assert _get_monitor_config.call_count == 1 + + +@pytest.mark.parametrize( + "task_name,exclude_beat_tasks,task_in_excluded_beat_tasks", + [ + ["some_task_name", ["xxx", "some_task.*"], True], + ["some_task_name", ["xxx", "some_other_task.*"], False], + ], +) +def test_exclude_redbeat_tasks_option( + task_name, exclude_beat_tasks, task_in_excluded_beat_tasks +): + """ + Test excluding Celery RedBeat tasks from automatic instrumentation. + """ + fake_maybe_due = MagicMock() + + fake_redbeat_scheduler = MagicMock() + fake_redbeat_scheduler.maybe_due = fake_maybe_due + + fake_integration = MagicMock() + fake_integration.exclude_beat_tasks = exclude_beat_tasks + + fake_schedule_entry = MagicMock() + fake_schedule_entry.name = task_name + + fake_get_monitor_config = MagicMock() + + with mock.patch( + "sentry_sdk.integrations.celery.RedBeatScheduler", fake_redbeat_scheduler + ) as RedBeatScheduler: # noqa: N806 + with mock.patch( + "sentry_sdk.integrations.celery.Hub.current.get_integration", + return_value=fake_integration, + ): + with mock.patch( + "sentry_sdk.integrations.celery._get_monitor_config", + fake_get_monitor_config, + ) as _get_monitor_config: + # Mimic CeleryIntegration patching of RedBeatScheduler.maybe_due() + _patch_redbeat_maybe_due() + # Mimic Celery RedBeat calling a task from the RedBeat schedule + RedBeatScheduler.maybe_due(fake_redbeat_scheduler, fake_schedule_entry) + + if task_in_excluded_beat_tasks: + # Only the original RedBeatScheduler.maybe_due() is called, _get_monitor_config is NOT called. + assert fake_maybe_due.call_count == 1 + _get_monitor_config.assert_not_called() + + else: + # The original RedBeatScheduler.maybe_due() is called, AND _get_monitor_config is called. + assert fake_maybe_due.call_count == 1 + assert _get_monitor_config.call_count == 1 diff --git a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py index 74a04fac44..b39f722c52 100644 --- a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py +++ b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py @@ -10,6 +10,7 @@ from sentry_sdk import start_transaction, capture_message from sentry_sdk.integrations.clickhouse_driver import ClickhouseDriverIntegration +from tests.conftest import ApproxDict EXPECT_PARAMS_IN_SELECT = True if clickhouse_driver.VERSION < (0, 2, 6): @@ -102,6 +103,9 @@ def test_clickhouse_client_breadcrumbs(sentry_init, capture_events) -> None: if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -201,6 +205,9 @@ def test_clickhouse_client_breadcrumbs_with_pii(sentry_init, capture_events) -> if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -313,6 +320,9 @@ def test_clickhouse_client_spans( if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) @@ -434,6 +444,9 @@ def test_clickhouse_client_spans_with_pii( if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) @@ -529,6 +542,9 @@ def test_clickhouse_dbapi_breadcrumbs(sentry_init, capture_events) -> None: if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -629,6 +645,9 @@ def test_clickhouse_dbapi_breadcrumbs_with_pii(sentry_init, capture_events) -> N if not EXPECT_PARAMS_IN_SELECT: expected_breadcrumbs[-1]["data"].pop("db.params", None) + for crumb in expected_breadcrumbs: + crumb["data"] = ApproxDict(crumb["data"]) + for crumb in event["breadcrumbs"]["values"]: crumb.pop("timestamp", None) @@ -739,6 +758,9 @@ def test_clickhouse_dbapi_spans(sentry_init, capture_events, capture_envelopes) if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) @@ -860,6 +882,9 @@ def test_clickhouse_dbapi_spans_with_pii( if not EXPECT_PARAMS_IN_SELECT: expected_spans[-1]["data"].pop("db.params", None) + for span in expected_spans: + span["data"] = ApproxDict(span["data"]) + for span in event["spans"]: span.pop("span_id", None) span.pop("start_timestamp", None) diff --git a/tests/integrations/django/test_basic.py b/tests/integrations/django/test_basic.py index 095657fd8a..8c01c71830 100644 --- a/tests/integrations/django/test_basic.py +++ b/tests/integrations/django/test_basic.py @@ -27,7 +27,7 @@ from sentry_sdk.integrations.django.caching import _get_span_description from sentry_sdk.integrations.executing import ExecutingIntegration from sentry_sdk.tracing import Span -from tests.conftest import unpack_werkzeug_response +from tests.conftest import ApproxDict, unpack_werkzeug_response from tests.integrations.django.myapp.wsgi import application from tests.integrations.django.utils import pytest_mark_django_db_decorator @@ -1237,14 +1237,14 @@ def test_cache_spans_middleware( assert first_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert first_event["spans"][0]["data"] == {"cache.hit": False} + assert first_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert len(second_event["spans"]) == 2 assert second_event["spans"][0]["op"] == "cache.get_item" assert second_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert second_event["spans"][0]["data"] == {"cache.hit": False} + assert second_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert second_event["spans"][1]["op"] == "cache.get_item" assert second_event["spans"][1]["description"].startswith( @@ -1279,14 +1279,14 @@ def test_cache_spans_decorator(sentry_init, client, capture_events, use_django_c assert first_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert first_event["spans"][0]["data"] == {"cache.hit": False} + assert first_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert len(second_event["spans"]) == 2 assert second_event["spans"][0]["op"] == "cache.get_item" assert second_event["spans"][0]["description"].startswith( "get views.decorators.cache.cache_header." ) - assert second_event["spans"][0]["data"] == {"cache.hit": False} + assert second_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert second_event["spans"][1]["op"] == "cache.get_item" assert second_event["spans"][1]["description"].startswith( @@ -1323,7 +1323,7 @@ def test_cache_spans_templatetag( assert first_event["spans"][0]["description"].startswith( "get template.cache.some_identifier." ) - assert first_event["spans"][0]["data"] == {"cache.hit": False} + assert first_event["spans"][0]["data"] == ApproxDict({"cache.hit": False}) assert len(second_event["spans"]) == 1 assert second_event["spans"][0]["op"] == "cache.get_item" diff --git a/tests/integrations/grpc/test_grpc.py b/tests/integrations/grpc/test_grpc.py index 0813d655ae..3f49c0a0f4 100644 --- a/tests/integrations/grpc/test_grpc.py +++ b/tests/integrations/grpc/test_grpc.py @@ -11,6 +11,7 @@ from sentry_sdk import Hub, start_transaction from sentry_sdk.consts import OP from sentry_sdk.integrations.grpc import GRPCIntegration +from tests.conftest import ApproxDict from tests.integrations.grpc.grpc_test_service_pb2 import gRPCTestMessage from tests.integrations.grpc.grpc_test_service_pb2_grpc import ( gRPCTestServiceServicer, @@ -151,11 +152,13 @@ def test_grpc_client_starts_span(sentry_init, capture_events_forksafe): span["description"] == "unary unary call to /grpc_test_server.gRPCTestService/TestServe" ) - assert span["data"] == { - "type": "unary unary", - "method": "/grpc_test_server.gRPCTestService/TestServe", - "code": "OK", - } + assert span["data"] == ApproxDict( + { + "type": "unary unary", + "method": "/grpc_test_server.gRPCTestService/TestServe", + "code": "OK", + } + ) @pytest.mark.forked @@ -183,10 +186,12 @@ def test_grpc_client_unary_stream_starts_span(sentry_init, capture_events_forksa span["description"] == "unary stream call to /grpc_test_server.gRPCTestService/TestUnaryStream" ) - assert span["data"] == { - "type": "unary stream", - "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", - } + assert span["data"] == ApproxDict( + { + "type": "unary stream", + "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", + } + ) # using unittest.mock.Mock not possible because grpc verifies @@ -229,11 +234,13 @@ def test_grpc_client_other_interceptor(sentry_init, capture_events_forksafe): span["description"] == "unary unary call to /grpc_test_server.gRPCTestService/TestServe" ) - assert span["data"] == { - "type": "unary unary", - "method": "/grpc_test_server.gRPCTestService/TestServe", - "code": "OK", - } + assert span["data"] == ApproxDict( + { + "type": "unary unary", + "method": "/grpc_test_server.gRPCTestService/TestServe", + "code": "OK", + } + ) @pytest.mark.forked diff --git a/tests/integrations/grpc/test_grpc_aio.py b/tests/integrations/grpc/test_grpc_aio.py index 0b8571adca..3e21188ec8 100644 --- a/tests/integrations/grpc/test_grpc_aio.py +++ b/tests/integrations/grpc/test_grpc_aio.py @@ -11,6 +11,7 @@ from sentry_sdk import Hub, start_transaction from sentry_sdk.consts import OP from sentry_sdk.integrations.grpc import GRPCIntegration +from tests.conftest import ApproxDict from tests.integrations.grpc.grpc_test_service_pb2 import gRPCTestMessage from tests.integrations.grpc.grpc_test_service_pb2_grpc import ( gRPCTestServiceServicer, @@ -161,11 +162,13 @@ async def test_grpc_client_starts_span( span["description"] == "unary unary call to /grpc_test_server.gRPCTestService/TestServe" ) - assert span["data"] == { - "type": "unary unary", - "method": "/grpc_test_server.gRPCTestService/TestServe", - "code": "OK", - } + assert span["data"] == ApproxDict( + { + "type": "unary unary", + "method": "/grpc_test_server.gRPCTestService/TestServe", + "code": "OK", + } + ) @pytest.mark.asyncio @@ -190,10 +193,12 @@ async def test_grpc_client_unary_stream_starts_span( span["description"] == "unary stream call to /grpc_test_server.gRPCTestService/TestUnaryStream" ) - assert span["data"] == { - "type": "unary stream", - "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", - } + assert span["data"] == ApproxDict( + { + "type": "unary stream", + "method": "/grpc_test_server.gRPCTestService/TestUnaryStream", + } + ) @pytest.mark.asyncio diff --git a/tests/integrations/httpx/test_httpx.py b/tests/integrations/httpx/test_httpx.py index e141faa282..c4ca97321c 100644 --- a/tests/integrations/httpx/test_httpx.py +++ b/tests/integrations/httpx/test_httpx.py @@ -7,6 +7,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.consts import MATCH_ALL, SPANDATA from sentry_sdk.integrations.httpx import HttpxIntegration +from tests.conftest import ApproxDict try: from unittest import mock # python 3.3 and above @@ -46,15 +47,17 @@ def before_breadcrumb(crumb, hint): crumb = event["breadcrumbs"]["values"][0] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - "extra": "foo", - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + "extra": "foo", + } + ) @pytest.mark.parametrize( @@ -291,9 +294,15 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): capture_message("Testing!") (event,) = events - assert event["breadcrumbs"]["values"][0]["data"] == { - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - # no url related data - } + assert event["breadcrumbs"]["values"][0]["data"] == ApproxDict( + { + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + # no url related data + } + ) + + assert "url" not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_FRAGMENT not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_QUERY not in event["breadcrumbs"]["values"][0]["data"] diff --git a/tests/integrations/huey/test_huey.py b/tests/integrations/huey/test_huey.py index 0bebd91b19..48a3da97f4 100644 --- a/tests/integrations/huey/test_huey.py +++ b/tests/integrations/huey/test_huey.py @@ -172,3 +172,21 @@ def dummy_task(): assert len(event["spans"]) assert event["spans"][0]["op"] == "queue.submit.huey" assert event["spans"][0]["description"] == "different_task_name" + + +def test_huey_propagate_trace(init_huey, capture_events): + huey = init_huey() + + events = capture_events() + + @huey.task() + def propagated_trace_task(): + pass + + with start_transaction() as outer_transaction: + execute_huey_task(huey, propagated_trace_task) + + assert ( + events[0]["transaction"] == "propagated_trace_task" + ) # the "inner" transaction + assert events[0]["contexts"]["trace"]["trace_id"] == outer_transaction.trace_id diff --git a/tests/integrations/openai/__init__.py b/tests/integrations/openai/__init__.py new file mode 100644 index 0000000000..d6cc3d5505 --- /dev/null +++ b/tests/integrations/openai/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("openai") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py new file mode 100644 index 0000000000..074d859274 --- /dev/null +++ b/tests/integrations/openai/test_openai.py @@ -0,0 +1,231 @@ +import pytest +from openai import OpenAI, Stream, OpenAIError +from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat.chat_completion import Choice +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice +from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage + +from sentry_sdk import start_transaction +from sentry_sdk.integrations.openai import ( + OpenAIIntegration, + COMPLETION_TOKENS_USED, + PROMPT_TOKENS_USED, + TOTAL_TOKENS_USED, +) + +from unittest import mock # python 3.3 and above + + +EXAMPLE_CHAT_COMPLETION = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="the model response" + ), + ) + ], + created=10000000, + model="model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), +) + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_nonstreaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "the model response" in span["data"]["ai.responses"][0]["content"] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + assert span["data"][COMPLETION_TOKENS_USED] == 10 + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 + + +# noinspection PyTypeChecker +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_streaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=client) + returned_stream._iterator = [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "hello world" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"][COMPLETION_TOKENS_USED] == 2 + assert span["data"][PROMPT_TOKENS_USED] == 1 + assert span["data"][TOTAL_TOKENS_USED] == 3 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_embeddings_create( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + + returned_embedding = CreateEmbeddingResponse( + data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], + model="some-model", + object="list", + usage=EmbeddingTokenUsage( + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.embeddings._post = mock.Mock(return_value=returned_embedding) + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.embeddings.create.openai" + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0] + else: + assert "ai.input_messages" not in span["data"] + + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 diff --git a/tests/integrations/opentelemetry/test_span_processor.py b/tests/integrations/opentelemetry/test_span_processor.py index b7e5a7928d..02e3059ca8 100644 --- a/tests/integrations/opentelemetry/test_span_processor.py +++ b/tests/integrations/opentelemetry/test_span_processor.py @@ -531,3 +531,95 @@ def test_link_trace_context_to_error_event(): assert "contexts" in event assert "trace" in event["contexts"] assert event["contexts"]["trace"] == fake_trace_context + + +def test_pruning_old_spans_on_start(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + parent_context = {} + fake_client = MagicMock() + fake_client.options = {"instrumenter": "otel"} + fake_client.dsn = "https://1234567890abcdef@o123456.ingest.sentry.io/123456" + + current_hub = MagicMock() + current_hub.client = fake_client + + fake_hub = MagicMock() + fake_hub.current = current_hub + + with mock.patch( + "sentry_sdk.integrations.opentelemetry.span_processor.Hub", fake_hub + ): + span_processor = SentrySpanProcessor() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_start(otel_span, parent_context) + assert sorted(list(span_processor.otel_span_map.keys())) == [ + "111111111abcdef", + "1234567890abcdef", + ] + assert sorted(list(span_processor.open_spans.values())) == [ + {"111111111abcdef"}, + {"1234567890abcdef"}, + ] + + +def test_pruning_old_spans_on_end(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + fake_sentry_span = MagicMock(spec=Span) + fake_sentry_span.set_context = MagicMock() + fake_sentry_span.finish = MagicMock() + + span_processor = SentrySpanProcessor() + span_processor._get_otel_context = MagicMock() + span_processor._update_span_with_otel_data = MagicMock() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + "1234567890abcdef": fake_sentry_span, # should go (because it is closed) + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes: {"1234567890abcdef"}, # should go (because it is closed) + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_end(otel_span) + assert sorted(list(span_processor.otel_span_map.keys())) == ["111111111abcdef"] + assert sorted(list(span_processor.open_spans.values())) == [{"111111111abcdef"}] diff --git a/tests/integrations/redis/asyncio/test_redis_asyncio.py b/tests/integrations/redis/asyncio/test_redis_asyncio.py index 7233b8f908..4f024a2824 100644 --- a/tests/integrations/redis/asyncio/test_redis_asyncio.py +++ b/tests/integrations/redis/asyncio/test_redis_asyncio.py @@ -3,6 +3,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict from fakeredis.aioredis import FakeRedis @@ -64,18 +65,20 @@ async def test_async_redis_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "0", - SPANDATA.SERVER_ADDRESS: connection.connection_pool.connection_kwargs.get( - "host" - ), - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "0", + SPANDATA.SERVER_ADDRESS: connection.connection_pool.connection_kwargs.get( + "host" + ), + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.transaction": is_transaction, "redis.is_cluster": False, diff --git a/tests/integrations/redis/cluster/test_redis_cluster.py b/tests/integrations/redis/cluster/test_redis_cluster.py index 1e1e59e254..a16d66588c 100644 --- a/tests/integrations/redis/cluster/test_redis_cluster.py +++ b/tests/integrations/redis/cluster/test_redis_cluster.py @@ -3,6 +3,7 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.api import start_transaction from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict import redis @@ -82,12 +83,14 @@ def test_rediscluster_basic(sentry_init, capture_events, send_default_pii, descr span = spans[-1] assert span["op"] == "db.redis" assert span["description"] == description - assert span["data"] == { - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "db.operation": "SET", "redis.command": "SET", @@ -125,16 +128,18 @@ def test_rediscluster_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.transaction": False, # For Cluster, this is always False "redis.is_cluster": True, diff --git a/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py b/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py index ad78b79e27..a6d8962afe 100644 --- a/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py +++ b/tests/integrations/redis/cluster_asyncio/test_redis_cluster_asyncio.py @@ -3,6 +3,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict from redis.asyncio import cluster @@ -47,12 +48,14 @@ async def test_async_breadcrumb(sentry_init, capture_events): assert crumb == { "category": "redis", "message": "GET 'foobar'", - "data": { - "db.operation": "GET", - "redis.key": "foobar", - "redis.command": "GET", - "redis.is_cluster": True, - }, + "data": ApproxDict( + { + "db.operation": "GET", + "redis.key": "foobar", + "redis.command": "GET", + "redis.is_cluster": True, + } + ), "timestamp": crumb["timestamp"], "type": "redis", } @@ -82,12 +85,14 @@ async def test_async_basic(sentry_init, capture_events, send_default_pii, descri (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == description - assert span["data"] == { - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.is_cluster": True, "db.operation": "SET", @@ -126,16 +131,18 @@ async def test_async_redis_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - # ClusterNode converts localhost to 127.0.0.1 - SPANDATA.SERVER_ADDRESS: "127.0.0.1", - SPANDATA.SERVER_PORT: 6379, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + # ClusterNode converts localhost to 127.0.0.1 + SPANDATA.SERVER_ADDRESS: "127.0.0.1", + SPANDATA.SERVER_PORT: 6379, + } + ) assert span["tags"] == { "redis.transaction": False, "redis.is_cluster": True, diff --git a/tests/integrations/rediscluster/test_rediscluster.py b/tests/integrations/rediscluster/test_rediscluster.py index 14d831a647..88f987758b 100644 --- a/tests/integrations/rediscluster/test_rediscluster.py +++ b/tests/integrations/rediscluster/test_rediscluster.py @@ -4,6 +4,7 @@ from sentry_sdk.api import start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.redis import RedisIntegration +from tests.conftest import ApproxDict try: from unittest import mock @@ -56,12 +57,14 @@ def test_rediscluster_basic(rediscluster_cls, sentry_init, capture_events): assert crumb == { "category": "redis", "message": "GET 'foobar'", - "data": { - "db.operation": "GET", - "redis.key": "foobar", - "redis.command": "GET", - "redis.is_cluster": True, - }, + "data": ApproxDict( + { + "db.operation": "GET", + "redis.key": "foobar", + "redis.command": "GET", + "redis.is_cluster": True, + } + ), "timestamp": crumb["timestamp"], "type": "redis", } @@ -96,16 +99,18 @@ def test_rediscluster_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 3, - "first_ten": expected_first_ten, - }, - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "1", - SPANDATA.SERVER_ADDRESS: "localhost", - SPANDATA.SERVER_PORT: 63791, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 3, + "first_ten": expected_first_ten, + }, + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "1", + SPANDATA.SERVER_ADDRESS: "localhost", + SPANDATA.SERVER_PORT: 63791, + } + ) assert span["tags"] == { "redis.transaction": False, # For Cluster, this is always False "redis.is_cluster": True, @@ -127,12 +132,14 @@ def test_db_connection_attributes_client(sentry_init, capture_events, redisclust (event,) = events (span,) = event["spans"] - assert span["data"] == { - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "1", - SPANDATA.SERVER_ADDRESS: "localhost", - SPANDATA.SERVER_PORT: 63791, - } + assert span["data"] == ApproxDict( + { + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "1", + SPANDATA.SERVER_ADDRESS: "localhost", + SPANDATA.SERVER_PORT: 63791, + } + ) @pytest.mark.parametrize("rediscluster_cls", rediscluster_classes) @@ -155,13 +162,15 @@ def test_db_connection_attributes_pipeline( (span,) = event["spans"] assert span["op"] == "db.redis" assert span["description"] == "redis.pipeline.execute" - assert span["data"] == { - "redis.commands": { - "count": 1, - "first_ten": ["GET 'foo'"], - }, - SPANDATA.DB_SYSTEM: "redis", - SPANDATA.DB_NAME: "1", - SPANDATA.SERVER_ADDRESS: "localhost", - SPANDATA.SERVER_PORT: 63791, - } + assert span["data"] == ApproxDict( + { + "redis.commands": { + "count": 1, + "first_ten": ["GET 'foo'"], + }, + SPANDATA.DB_SYSTEM: "redis", + SPANDATA.DB_NAME: "1", + SPANDATA.SERVER_ADDRESS: "localhost", + SPANDATA.SERVER_PORT: 63791, + } + ) diff --git a/tests/integrations/requests/test_requests.py b/tests/integrations/requests/test_requests.py index ed5b273712..1f4dd412d7 100644 --- a/tests/integrations/requests/test_requests.py +++ b/tests/integrations/requests/test_requests.py @@ -6,6 +6,7 @@ from sentry_sdk import capture_message from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.stdlib import StdlibIntegration +from tests.conftest import ApproxDict try: from unittest import mock # python 3.3 and above @@ -28,14 +29,16 @@ def test_crumb_capture(sentry_init, capture_events): (crumb,) = event["breadcrumbs"]["values"] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - SPANDATA.HTTP_STATUS_CODE: response.status_code, - "reason": response.reason, - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + SPANDATA.HTTP_STATUS_CODE: response.status_code, + "reason": response.reason, + } + ) @pytest.mark.tests_internal_exceptions @@ -56,9 +59,15 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events): capture_message("Testing!") (event,) = events - assert event["breadcrumbs"]["values"][0]["data"] == { - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: response.status_code, - "reason": response.reason, - # no url related data - } + assert event["breadcrumbs"]["values"][0]["data"] == ApproxDict( + { + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: response.status_code, + "reason": response.reason, + # no url related data + } + ) + + assert "url" not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_FRAGMENT not in event["breadcrumbs"]["values"][0]["data"] + assert SPANDATA.HTTP_QUERY not in event["breadcrumbs"]["values"][0]["data"] diff --git a/tests/integrations/socket/test_socket.py b/tests/integrations/socket/test_socket.py index 914ba0bf84..4f93c1f2a5 100644 --- a/tests/integrations/socket/test_socket.py +++ b/tests/integrations/socket/test_socket.py @@ -2,6 +2,7 @@ from sentry_sdk import start_transaction from sentry_sdk.integrations.socket import SocketIntegration +from tests.conftest import ApproxDict def test_getaddrinfo_trace(sentry_init, capture_events): @@ -16,10 +17,12 @@ def test_getaddrinfo_trace(sentry_init, capture_events): assert span["op"] == "socket.dns" assert span["description"] == "example.com:443" - assert span["data"] == { - "host": "example.com", - "port": 443, - } + assert span["data"] == ApproxDict( + { + "host": "example.com", + "port": 443, + } + ) def test_create_connection_trace(sentry_init, capture_events): @@ -37,15 +40,19 @@ def test_create_connection_trace(sentry_init, capture_events): assert connect_span["op"] == "socket.connection" assert connect_span["description"] == "example.com:443" - assert connect_span["data"] == { - "address": ["example.com", 443], - "timeout": timeout, - "source_address": None, - } + assert connect_span["data"] == ApproxDict( + { + "address": ["example.com", 443], + "timeout": timeout, + "source_address": None, + } + ) assert dns_span["op"] == "socket.dns" assert dns_span["description"] == "example.com:443" - assert dns_span["data"] == { - "host": "example.com", - "port": 443, - } + assert dns_span["data"] == ApproxDict( + { + "host": "example.com", + "port": 443, + } + ) diff --git a/tests/integrations/stdlib/test_httplib.py b/tests/integrations/stdlib/test_httplib.py index d50bf42e21..6055b86ab8 100644 --- a/tests/integrations/stdlib/test_httplib.py +++ b/tests/integrations/stdlib/test_httplib.py @@ -27,7 +27,7 @@ from sentry_sdk.tracing import Transaction from sentry_sdk.integrations.stdlib import StdlibIntegration -from tests.conftest import create_mock_http_server +from tests.conftest import ApproxDict, create_mock_http_server PORT = create_mock_http_server() @@ -46,14 +46,16 @@ def test_crumb_capture(sentry_init, capture_events): assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + } + ) def test_crumb_capture_hint(sentry_init, capture_events): @@ -73,15 +75,17 @@ def before_breadcrumb(crumb, hint): (crumb,) = event["breadcrumbs"]["values"] assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": url, - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - "extra": "foo", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - } + assert crumb["data"] == ApproxDict( + { + "url": url, + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + "extra": "foo", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + } + ) def test_empty_realurl(sentry_init): @@ -131,14 +135,16 @@ def test_httplib_misuse(sentry_init, capture_events, request): assert crumb["type"] == "http" assert crumb["category"] == "httplib" - assert crumb["data"] == { - "url": "http://localhost:{}/200".format(PORT), - SPANDATA.HTTP_METHOD: "GET", - SPANDATA.HTTP_STATUS_CODE: 200, - "reason": "OK", - SPANDATA.HTTP_FRAGMENT: "", - SPANDATA.HTTP_QUERY: "", - } + assert crumb["data"] == ApproxDict( + { + "url": "http://localhost:{}/200".format(PORT), + SPANDATA.HTTP_METHOD: "GET", + SPANDATA.HTTP_STATUS_CODE: 200, + "reason": "OK", + SPANDATA.HTTP_FRAGMENT: "", + SPANDATA.HTTP_QUERY: "", + } + ) def test_outgoing_trace_headers(sentry_init, monkeypatch): diff --git a/tests/integrations/stdlib/test_subprocess.py b/tests/integrations/stdlib/test_subprocess.py index 31da043ac3..d61be35fd2 100644 --- a/tests/integrations/stdlib/test_subprocess.py +++ b/tests/integrations/stdlib/test_subprocess.py @@ -8,6 +8,7 @@ from sentry_sdk import capture_message, start_transaction from sentry_sdk._compat import PY2 from sentry_sdk.integrations.stdlib import StdlibIntegration +from tests.conftest import ApproxDict if PY2: @@ -125,7 +126,7 @@ def test_subprocess_basic( assert message_event["message"] == "hi" - data = {"subprocess.cwd": os.getcwd()} if with_cwd else {} + data = ApproxDict({"subprocess.cwd": os.getcwd()} if with_cwd else {}) (crumb,) = message_event["breadcrumbs"]["values"] assert crumb == { diff --git a/tests/integrations/strawberry/test_strawberry_py3.py b/tests/integrations/strawberry/test_strawberry_py3.py index b357779461..4911a1b5c3 100644 --- a/tests/integrations/strawberry/test_strawberry_py3.py +++ b/tests/integrations/strawberry/test_strawberry_py3.py @@ -25,6 +25,7 @@ SentryAsyncExtension, SentrySyncExtension, ) +from tests.conftest import ApproxDict parameterize_strawberry_test = pytest.mark.parametrize( @@ -351,12 +352,14 @@ def test_capture_transaction_on_error( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Query.error" - assert resolve_span["data"] == { - "graphql.field_name": "error", - "graphql.parent_type": "Query", - "graphql.field_path": "Query.error", - "graphql.path": "error", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "error", + "graphql.parent_type": "Query", + "graphql.field_path": "Query.error", + "graphql.path": "error", + } + ) @parameterize_strawberry_test @@ -429,12 +432,14 @@ def test_capture_transaction_on_success( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Query.hello" - assert resolve_span["data"] == { - "graphql.field_name": "hello", - "graphql.parent_type": "Query", - "graphql.field_path": "Query.hello", - "graphql.path": "hello", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "hello", + "graphql.parent_type": "Query", + "graphql.field_path": "Query.hello", + "graphql.path": "hello", + } + ) @parameterize_strawberry_test @@ -507,12 +512,14 @@ def test_transaction_no_operation_name( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Query.hello" - assert resolve_span["data"] == { - "graphql.field_name": "hello", - "graphql.parent_type": "Query", - "graphql.field_path": "Query.hello", - "graphql.path": "hello", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "hello", + "graphql.parent_type": "Query", + "graphql.field_path": "Query.hello", + "graphql.path": "hello", + } + ) @parameterize_strawberry_test @@ -585,9 +592,11 @@ def test_transaction_mutation( resolve_span = resolve_spans[0] assert resolve_span["parent_span_id"] == query_span["span_id"] assert resolve_span["description"] == "resolving Mutation.change" - assert resolve_span["data"] == { - "graphql.field_name": "change", - "graphql.parent_type": "Mutation", - "graphql.field_path": "Mutation.change", - "graphql.path": "change", - } + assert resolve_span["data"] == ApproxDict( + { + "graphql.field_name": "change", + "graphql.parent_type": "Mutation", + "graphql.field_path": "Mutation.change", + "graphql.path": "change", + } + ) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index d3cfd659d1..1d4a49fcb2 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -811,6 +811,7 @@ def test_tag_normalization( metrics.distribution("a", 1.0, tags={"foo-bar": "%$foo"}, timestamp=ts) metrics.distribution("b", 1.0, tags={"foo$$$bar": "blah{}"}, timestamp=ts) metrics.distribution("c", 1.0, tags={u"foö-bar": u"snöwmän"}, timestamp=ts) + metrics.distribution("d", 1.0, tags={"route": "GET /foo"}, timestamp=ts) # fmt: on Hub.current.flush() @@ -820,9 +821,9 @@ def test_tag_normalization( assert envelope.items[0].headers["type"] == "statsd" m = parse_metrics(envelope.items[0].payload.get_bytes()) - assert len(m) == 3 + assert len(m) == 4 assert m[0][4] == { - "foo-bar": "_$foo", + "foo-bar": "$foo", "release": "fun-release@1.0.0", "environment": "not-fun-env", } @@ -839,6 +840,11 @@ def test_tag_normalization( "release": "fun-release@1.0.0", "environment": "not-fun-env", } + assert m[3][4] == { + "release": "fun-release@1.0.0", + "environment": "not-fun-env", + "route": "GET /foo", + } # fmt: on diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 94659ff02f..495dd3f300 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -16,13 +16,11 @@ extract_frame, extract_stack, frame_id, - get_current_thread_id, get_frame_name, setup_profiler, ) from sentry_sdk.tracing import Transaction from sentry_sdk._lru_cache import LRUCache -from sentry_sdk._queue import Queue try: from unittest import mock # python 3.3 and above @@ -556,74 +554,6 @@ def test_extract_stack_with_cache(frame, depth): assert frame1 is frame2, i -@requires_python_version(3, 3) -def test_get_current_thread_id_explicit_thread(): - results = Queue(maxsize=1) - - def target1(): - pass - - def target2(): - results.put(get_current_thread_id(thread1)) - - thread1 = threading.Thread(target=target1) - thread1.start() - - thread2 = threading.Thread(target=target2) - thread2.start() - - thread2.join() - thread1.join() - - assert thread1.ident == results.get(timeout=1) - - -@requires_python_version(3, 3) -@requires_gevent -def test_get_current_thread_id_gevent_in_thread(): - results = Queue(maxsize=1) - - def target(): - job = gevent.spawn(get_current_thread_id) - job.join() - results.put(job.value) - - thread = threading.Thread(target=target) - thread.start() - thread.join() - assert thread.ident == results.get(timeout=1) - - -@requires_python_version(3, 3) -def test_get_current_thread_id_running_thread(): - results = Queue(maxsize=1) - - def target(): - results.put(get_current_thread_id()) - - thread = threading.Thread(target=target) - thread.start() - thread.join() - assert thread.ident == results.get(timeout=1) - - -@requires_python_version(3, 3) -def test_get_current_thread_id_main_thread(): - results = Queue(maxsize=1) - - def target(): - # mock that somehow the current thread doesn't exist - with mock.patch("threading.current_thread", side_effect=[None]): - results.put(get_current_thread_id()) - - thread_id = threading.main_thread().ident if sys.version_info >= (3, 4) else None - - thread = threading.Thread(target=target) - thread.start() - thread.join() - assert thread_id == results.get(timeout=1) - - def get_scheduler_threads(scheduler): return [thread for thread in threading.enumerate() if thread.name == scheduler.name] diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py index 4b2dfff450..2c4bd3aa90 100644 --- a/tests/test_scrubber.py +++ b/tests/test_scrubber.py @@ -4,6 +4,7 @@ from sentry_sdk import capture_exception, capture_event, start_transaction, start_span from sentry_sdk.utils import event_from_exception from sentry_sdk.scrubber import EventScrubber +from tests.conftest import ApproxDict logger = logging.getLogger(__name__) @@ -121,7 +122,9 @@ def test_span_data_scrubbing(sentry_init, capture_events): span.set_data("datafoo", "databar") (event,) = events - assert event["spans"][0]["data"] == {"password": "[Filtered]", "datafoo": "databar"} + assert event["spans"][0]["data"] == ApproxDict( + {"password": "[Filtered]", "datafoo": "databar"} + ) assert event["_meta"]["spans"] == { "0": {"data": {"password": {"": {"rem": [["!config", "s"]]}}}} } @@ -169,3 +172,18 @@ def test_scrubbing_doesnt_affect_local_vars(sentry_init, capture_events): (frame,) = frames assert frame["vars"]["password"] == "[Filtered]" assert password == "cat123" + + +def test_recursive_event_scrubber(sentry_init, capture_events): + sentry_init(event_scrubber=EventScrubber(recursive=True)) + events = capture_events() + complex_structure = { + "deep": { + "deeper": [{"deepest": {"password": "my_darkest_secret"}}], + }, + } + + capture_event({"extra": complex_structure}) + + (event,) = events + assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'" diff --git a/tests/test_transport.py b/tests/test_transport.py index 71c47e04fc..c1f70b0108 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -3,18 +3,17 @@ import pickle import gzip import io - +import socket +from collections import namedtuple from datetime import datetime, timedelta import pytest -from collections import namedtuple -from werkzeug.wrappers import Request, Response - from pytest_localserver.http import WSGIServer +from werkzeug.wrappers import Request, Response from sentry_sdk import Hub, Client, add_breadcrumb, capture_message, Scope from sentry_sdk._compat import datetime_utcnow -from sentry_sdk.transport import _parse_rate_limits +from sentry_sdk.transport import KEEP_ALIVE_SOCKET_OPTIONS, _parse_rate_limits from sentry_sdk.envelope import Envelope, parse_json from sentry_sdk.integrations.logging import LoggingIntegration @@ -155,6 +154,79 @@ def test_transport_num_pools(make_client, num_pools, expected_num_pools): assert options["num_pools"] == expected_num_pools +def test_socket_options(make_client): + socket_options = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 10), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + + client = make_client(socket_options=socket_options) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == socket_options + + +def test_keep_alive_true(make_client): + client = make_client(keep_alive=True) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == KEEP_ALIVE_SOCKET_OPTIONS + + +def test_keep_alive_off_by_default(make_client): + client = make_client() + options = client.transport._get_pool_options([]) + assert "socket_options" not in options + + +def test_socket_options_override_keep_alive(make_client): + socket_options = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 10), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + + client = make_client(socket_options=socket_options, keep_alive=False) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == socket_options + + +def test_socket_options_merge_with_keep_alive(make_client): + socket_options = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 42), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 42), + ] + + client = make_client(socket_options=socket_options, keep_alive=True) + + options = client.transport._get_pool_options([]) + try: + assert options["socket_options"] == [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 42), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 42), + (socket.SOL_TCP, socket.TCP_KEEPIDLE, 45), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + except AttributeError: + assert options["socket_options"] == [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 42), + (socket.SOL_TCP, socket.TCP_KEEPINTVL, 42), + (socket.SOL_TCP, socket.TCP_KEEPCNT, 6), + ] + + +def test_socket_options_override_defaults(make_client): + # If socket_options are set to [], this doesn't mean the user doesn't want + # any custom socket_options, but rather that they want to disable the urllib3 + # socket option defaults, so we need to set this and not ignore it. + client = make_client(socket_options=[]) + + options = client.transport._get_pool_options([]) + assert options["socket_options"] == [] + + def test_transport_infinite_loop(capturing_server, request, make_client): client = make_client( debug=True, @@ -219,7 +291,7 @@ def test_parse_rate_limits(input, expected): assert dict(_parse_rate_limits(input, now=NOW)) == expected -def test_simple_rate_limits(capturing_server, capsys, caplog, make_client): +def test_simple_rate_limits(capturing_server, make_client): client = make_client() capturing_server.respond_with(code=429, headers={"Retry-After": "4"}) @@ -241,7 +313,7 @@ def test_simple_rate_limits(capturing_server, capsys, caplog, make_client): @pytest.mark.parametrize("response_code", [200, 429]) def test_data_category_limits( - capturing_server, capsys, caplog, response_code, make_client, monkeypatch + capturing_server, response_code, make_client, monkeypatch ): client = make_client(send_client_reports=False) @@ -288,7 +360,7 @@ def record_lost_event(reason, data_category=None, item=None): @pytest.mark.parametrize("response_code", [200, 429]) def test_data_category_limits_reporting( - capturing_server, capsys, caplog, response_code, make_client, monkeypatch + capturing_server, response_code, make_client, monkeypatch ): client = make_client(send_client_reports=True) @@ -371,7 +443,7 @@ def intercepting_fetch(*args, **kwargs): @pytest.mark.parametrize("response_code", [200, 429]) def test_complex_limits_without_data_category( - capturing_server, capsys, caplog, response_code, make_client + capturing_server, response_code, make_client ): client = make_client() capturing_server.respond_with( diff --git a/tests/test_utils.py b/tests/test_utils.py index 147064b541..4b8e9087cc 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,12 +1,15 @@ import pytest import re import sys +import threading from datetime import timedelta from sentry_sdk._compat import duration_in_milliseconds +from sentry_sdk._queue import Queue from sentry_sdk.utils import ( Components, Dsn, + get_current_thread_meta, get_default_release, get_error_message, get_git_revision, @@ -29,6 +32,11 @@ except ImportError: import mock # python < 3.3 +try: + import gevent +except ImportError: + gevent = None + try: # Python 3 FileNotFoundError @@ -607,3 +615,138 @@ def test_default_release_empty_string(): ) def test_duration_in_milliseconds(timedelta, expected_milliseconds): assert duration_in_milliseconds(timedelta) == expected_milliseconds + + +def test_get_current_thread_meta_explicit_thread(): + results = Queue(maxsize=1) + + def target1(): + pass + + def target2(): + results.put(get_current_thread_meta(thread1)) + + thread1 = threading.Thread(target=target1) + thread1.start() + + thread2 = threading.Thread(target=target2) + thread2.start() + + thread2.join() + thread1.join() + + assert (thread1.ident, thread1.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_bad_explicit_thread(): + thread = "fake thread" + + main_thread = threading.main_thread() + + assert (main_thread.ident, main_thread.name) == get_current_thread_meta(thread) + + +@pytest.mark.skipif(gevent is None, reason="gevent not enabled") +def test_get_current_thread_meta_gevent_in_thread(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("sentry_sdk.utils.is_gevent", side_effect=[True]): + job = gevent.spawn(get_current_thread_meta) + job.join() + results.put(job.value) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (thread.ident, None) == results.get(timeout=1) + + +@pytest.mark.skipif(gevent is None, reason="gevent not enabled") +def test_get_current_thread_meta_gevent_in_thread_failed_to_get_hub(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("sentry_sdk.utils.is_gevent", side_effect=[True]): + with mock.patch( + "sentry_sdk.utils.get_gevent_hub", side_effect=["fake hub"] + ): + job = gevent.spawn(get_current_thread_meta) + job.join() + results.put(job.value) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (thread.ident, thread.name) == results.get(timeout=1) + + +def test_get_current_thread_meta_running_thread(): + results = Queue(maxsize=1) + + def target(): + results.put(get_current_thread_meta()) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (thread.ident, thread.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_bad_running_thread(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("threading.current_thread", side_effect=["fake thread"]): + results.put(get_current_thread_meta()) + + thread = threading.Thread(target=target) + thread.start() + thread.join() + + main_thread = threading.main_thread() + assert (main_thread.ident, main_thread.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_main_thread(): + results = Queue(maxsize=1) + + def target(): + # mock that somehow the current thread doesn't exist + with mock.patch("threading.current_thread", side_effect=[None]): + results.put(get_current_thread_meta()) + + main_thread = threading.main_thread() + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (main_thread.ident, main_thread.name) == results.get(timeout=1) + + +@pytest.mark.skipif( + sys.version_info < (3, 4), reason="threading.main_thread() Not available" +) +def test_get_current_thread_meta_failed_to_get_main_thread(): + results = Queue(maxsize=1) + + def target(): + with mock.patch("threading.current_thread", side_effect=["fake thread"]): + with mock.patch("threading.current_thread", side_effect=["fake thread"]): + results.put(get_current_thread_meta()) + + main_thread = threading.main_thread() + + thread = threading.Thread(target=target) + thread.start() + thread.join() + assert (main_thread.ident, main_thread.name) == results.get(timeout=1) diff --git a/tox.ini b/tox.ini index a23251f186..1e7ba06a00 100644 --- a/tox.ini +++ b/tox.ini @@ -146,6 +146,11 @@ envlist = {py3.5,py3.11,py3.12}-loguru-v{0.5} {py3.5,py3.11,py3.12}-loguru-latest + # OpenAI + {py3.9,py3.11,py3.12}-openai-v1 + {py3.9,py3.11,py3.12}-openai-latest + {py3.9,py3.11,py3.12}-openai-notiktoken + # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry @@ -439,6 +444,13 @@ deps = loguru-v0.5: loguru~=0.5.0 loguru-latest: loguru + # OpenAI + openai-v1: openai~=1.0.0 + openai-v1: tiktoken~=0.6.0 + openai-latest: openai + openai-latest: tiktoken~=0.6.0 + openai-notiktoken: openai + # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro @@ -597,6 +609,7 @@ setenv = httpx: TESTPATH=tests/integrations/httpx huey: TESTPATH=tests/integrations/huey loguru: TESTPATH=tests/integrations/loguru + openai: TESTPATH=tests/integrations/openai opentelemetry: TESTPATH=tests/integrations/opentelemetry pure_eval: TESTPATH=tests/integrations/pure_eval pymongo: TESTPATH=tests/integrations/pymongo