From bb0243a0afb4b2ad6488cf6f20cd9306d534f45b Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Dec 2020 18:48:01 -0800 Subject: [PATCH] split tracing utils into separate module --- sentry_sdk/integrations/django/__init__.py | 2 +- sentry_sdk/integrations/sqlalchemy.py | 2 +- sentry_sdk/integrations/stdlib.py | 2 +- sentry_sdk/tracing.py | 175 ++------------------ sentry_sdk/tracing_utils.py | 180 +++++++++++++++++++++ tests/tracing/test_sampling.py | 7 +- 6 files changed, 196 insertions(+), 172 deletions(-) create mode 100644 sentry_sdk/tracing_utils.py diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index 008dc386bb..4705d2486e 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -9,7 +9,7 @@ from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.scope import add_global_event_processor from sentry_sdk.serializer import add_global_repr_processor -from sentry_sdk.tracing import record_sql_queries +from sentry_sdk.tracing_utils import record_sql_queries from sentry_sdk.utils import ( HAS_REAL_CONTEXTVARS, CONTEXTVARS_ERROR_MESSAGE, diff --git a/sentry_sdk/integrations/sqlalchemy.py b/sentry_sdk/integrations/sqlalchemy.py index 6c8e5eb88e..4b0207f5ec 100644 --- a/sentry_sdk/integrations/sqlalchemy.py +++ b/sentry_sdk/integrations/sqlalchemy.py @@ -3,7 +3,7 @@ from sentry_sdk._types import MYPY from sentry_sdk.hub import Hub from sentry_sdk.integrations import Integration, DidNotEnable -from sentry_sdk.tracing import record_sql_queries +from sentry_sdk.tracing_utils import record_sql_queries try: from sqlalchemy.engine import Engine # type: ignore diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index 56cece70ac..d645ae7333 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -6,7 +6,7 @@ from sentry_sdk.hub import Hub from sentry_sdk.integrations import Integration from sentry_sdk.scope import add_global_event_processor -from sentry_sdk.tracing import EnvironHeaders +from sentry_sdk.tracing_utils import EnvironHeaders from sentry_sdk.utils import capture_internal_exceptions, safe_repr from sentry_sdk._types import MYPY diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 73531894ef..18c3645cb0 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -1,27 +1,20 @@ -import re import uuid -import contextlib -import math import random import time from datetime import datetime, timedelta -from numbers import Real import sentry_sdk -from sentry_sdk.utils import ( - capture_internal_exceptions, - logger, - to_string, +from sentry_sdk.utils import logger +from sentry_sdk.tracing_utils import ( + SENTRY_TRACE_REGEX, + EnvironHeaders, + is_valid_sample_rate, + maybe_create_breadcrumbs_from_span, ) -from sentry_sdk._compat import PY2 from sentry_sdk._types import MYPY -if PY2: - from collections import Mapping -else: - from collections.abc import Mapping if MYPY: import typing @@ -35,45 +28,6 @@ from sentry_sdk._types import SamplingContext -_traceparent_header_format_re = re.compile( - "^[ \t]*" # whitespace - "([0-9a-f]{32})?" # trace_id - "-?([0-9a-f]{16})?" # span_id - "-?([01])?" # sampled - "[ \t]*$" # whitespace -) - - -class EnvironHeaders(Mapping): # type: ignore - def __init__( - self, - environ, # type: typing.Mapping[str, str] - prefix="HTTP_", # type: str - ): - # type: (...) -> None - self.environ = environ - self.prefix = prefix - - def __getitem__(self, key): - # type: (str) -> Optional[Any] - return self.environ[self.prefix + key.replace("-", "_").upper()] - - def __len__(self): - # type: () -> int - return sum(1 for _ in iter(self)) - - def __iter__(self): - # type: () -> Generator[str, None, None] - for k in self.environ: - if not isinstance(k, str): - continue - - k = k.replace("-", "_").upper() - if not k.startswith(self.prefix): - continue - - yield k[len(self.prefix) :] - class _SpanRecorder(object): """Limits the number of spans recorded in a transaction.""" @@ -325,7 +279,7 @@ def from_traceparent( if traceparent.startswith("00-") and traceparent.endswith("-00"): traceparent = traceparent[3:-3] - match = _traceparent_header_format_re.match(str(traceparent)) + match = SENTRY_TRACE_REGEX.match(str(traceparent)) if match is None: return None @@ -422,7 +376,7 @@ def finish(self, hub=None): except AttributeError: self.timestamp = datetime.utcnow() - _maybe_create_breadcrumbs_from_span(hub, self) + maybe_create_breadcrumbs_from_span(hub, self) return None def to_json(self): @@ -618,7 +572,7 @@ def _set_initial_sampling_decision(self, sampling_context): # Since this is coming from the user (or from a function provided by the # user), who knows what we might get. (The only valid values are # booleans or numbers between 0 and 1.) - if not _is_valid_sample_rate(sample_rate): + if not is_valid_sample_rate(sample_rate): logger.warning( "[Tracing] Discarding {transaction_description} because of invalid sample rate.".format( transaction_description=transaction_description, @@ -661,114 +615,3 @@ def _set_initial_sampling_decision(self, sampling_context): sample_rate=float(sample_rate), ) ) - - -def _is_valid_sample_rate(rate): - # type: (Any) -> bool - """ - Checks the given sample rate to make sure it is valid type and value (a - boolean or a number between 0 and 1, inclusive). - """ - - # both booleans and NaN are instances of Real, so a) checking for Real - # checks for the possibility of a boolean also, and b) we have to check - # separately for NaN - if not isinstance(rate, Real) or math.isnan(rate): - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( - rate=rate, type=type(rate) - ) - ) - return False - - # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False - rate = float(rate) - if rate < 0 or rate > 1: - logger.warning( - "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( - rate=rate - ) - ) - return False - - return True - - -def _format_sql(cursor, sql): - # type: (Any, str) -> Optional[str] - - real_sql = None - - # If we're using psycopg2, it could be that we're - # looking at a query that uses Composed objects. Use psycopg2's mogrify - # function to format the query. We lose per-parameter trimming but gain - # accuracy in formatting. - try: - if hasattr(cursor, "mogrify"): - real_sql = cursor.mogrify(sql) - if isinstance(real_sql, bytes): - real_sql = real_sql.decode(cursor.connection.encoding) - except Exception: - real_sql = None - - return real_sql or to_string(sql) - - -@contextlib.contextmanager -def record_sql_queries( - hub, # type: sentry_sdk.Hub - cursor, # type: Any - query, # type: Any - params_list, # type: Any - paramstyle, # type: Optional[str] - executemany, # type: bool -): - # type: (...) -> Generator[Span, None, None] - - # TODO: Bring back capturing of params by default - if hub.client and hub.client.options["_experiments"].get( - "record_sql_params", False - ): - if not params_list or params_list == [None]: - params_list = None - - if paramstyle == "pyformat": - paramstyle = "format" - else: - params_list = None - paramstyle = None - - query = _format_sql(cursor, query) - - data = {} - if params_list is not None: - data["db.params"] = params_list - if paramstyle is not None: - data["db.paramstyle"] = paramstyle - if executemany: - data["db.executemany"] = True - - with capture_internal_exceptions(): - hub.add_breadcrumb(message=query, category="query", data=data) - - with hub.start_span(op="db", description=query) as span: - for k, v in data.items(): - span.set_data(k, v) - yield span - - -def _maybe_create_breadcrumbs_from_span(hub, span): - # type: (sentry_sdk.Hub, Span) -> None - if span.op == "redis": - hub.add_breadcrumb( - message=span.description, type="redis", category="redis", data=span._tags - ) - elif span.op == "http": - hub.add_breadcrumb(type="http", category="httplib", data=span._data) - elif span.op == "subprocess": - hub.add_breadcrumb( - type="subprocess", - category="subprocess", - message=span.description, - data=span._data, - ) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py new file mode 100644 index 0000000000..aebb5c58d2 --- /dev/null +++ b/sentry_sdk/tracing_utils.py @@ -0,0 +1,180 @@ +import re +import contextlib +import math + +from numbers import Real + +import sentry_sdk + +from sentry_sdk.utils import ( + capture_internal_exceptions, + logger, + to_string, +) +from sentry_sdk._compat import PY2 +from sentry_sdk._types import MYPY + +if PY2: + from collections import Mapping +else: + from collections.abc import Mapping + +if MYPY: + import typing + + from typing import Generator + from typing import Optional + from typing import Any + + from sentry_sdk.tracing import Span + + +SENTRY_TRACE_REGEX = re.compile( + "^[ \t]*" # whitespace + "([0-9a-f]{32})?" # trace_id + "-?([0-9a-f]{16})?" # span_id + "-?([01])?" # sampled + "[ \t]*$" # whitespace +) + + +class EnvironHeaders(Mapping): # type: ignore + def __init__( + self, + environ, # type: typing.Mapping[str, str] + prefix="HTTP_", # type: str + ): + # type: (...) -> None + self.environ = environ + self.prefix = prefix + + def __getitem__(self, key): + # type: (str) -> Optional[Any] + return self.environ[self.prefix + key.replace("-", "_").upper()] + + def __len__(self): + # type: () -> int + return sum(1 for _ in iter(self)) + + def __iter__(self): + # type: () -> Generator[str, None, None] + for k in self.environ: + if not isinstance(k, str): + continue + + k = k.replace("-", "_").upper() + if not k.startswith(self.prefix): + continue + + yield k[len(self.prefix) :] + + +def is_valid_sample_rate(rate): + # type: (Any) -> bool + """ + Checks the given sample rate to make sure it is valid type and value (a + boolean or a number between 0 and 1, inclusive). + """ + + # both booleans and NaN are instances of Real, so a) checking for Real + # checks for the possibility of a boolean also, and b) we have to check + # separately for NaN + if not isinstance(rate, Real) or math.isnan(rate): + logger.warning( + "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format( + rate=rate, type=type(rate) + ) + ) + return False + + # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False + rate = float(rate) + if rate < 0 or rate > 1: + logger.warning( + "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format( + rate=rate + ) + ) + return False + + return True + + +@contextlib.contextmanager +def record_sql_queries( + hub, # type: sentry_sdk.Hub + cursor, # type: Any + query, # type: Any + params_list, # type: Any + paramstyle, # type: Optional[str] + executemany, # type: bool +): + # type: (...) -> Generator[Span, None, None] + + # TODO: Bring back capturing of params by default + if hub.client and hub.client.options["_experiments"].get( + "record_sql_params", False + ): + if not params_list or params_list == [None]: + params_list = None + + if paramstyle == "pyformat": + paramstyle = "format" + else: + params_list = None + paramstyle = None + + query = _format_sql(cursor, query) + + data = {} + if params_list is not None: + data["db.params"] = params_list + if paramstyle is not None: + data["db.paramstyle"] = paramstyle + if executemany: + data["db.executemany"] = True + + with capture_internal_exceptions(): + hub.add_breadcrumb(message=query, category="query", data=data) + + with hub.start_span(op="db", description=query) as span: + for k, v in data.items(): + span.set_data(k, v) + yield span + + +def maybe_create_breadcrumbs_from_span(hub, span): + # type: (sentry_sdk.Hub, Span) -> None + if span.op == "redis": + hub.add_breadcrumb( + message=span.description, type="redis", category="redis", data=span._tags + ) + elif span.op == "http": + hub.add_breadcrumb(type="http", category="httplib", data=span._data) + elif span.op == "subprocess": + hub.add_breadcrumb( + type="subprocess", + category="subprocess", + message=span.description, + data=span._data, + ) + + +def _format_sql(cursor, sql): + # type: (Any, str) -> Optional[str] + + real_sql = None + + # If we're using psycopg2, it could be that we're + # looking at a query that uses Composed objects. Use psycopg2's mogrify + # function to format the query. We lose per-parameter trimming but gain + # accuracy in formatting. + try: + if hasattr(cursor, "mogrify"): + real_sql = cursor.mogrify(sql) + if isinstance(real_sql, bytes): + real_sql = real_sql.decode(cursor.connection.encoding) + except Exception: + real_sql = None + + return real_sql or to_string(sql) diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py index 672110ada2..758b4be2da 100644 --- a/tests/tracing/test_sampling.py +++ b/tests/tracing/test_sampling.py @@ -3,7 +3,8 @@ import pytest from sentry_sdk import Hub, start_span, start_transaction -from sentry_sdk.tracing import Transaction, _is_valid_sample_rate +from sentry_sdk.tracing import Transaction +from sentry_sdk.tracing_utils import is_valid_sample_rate from sentry_sdk.utils import logger try: @@ -56,7 +57,7 @@ def test_no_double_sampling(sentry_init, capture_events): ) def test_accepts_valid_sample_rate(rate): with mock.patch.object(logger, "warning", mock.Mock()): - result = _is_valid_sample_rate(rate) + result = is_valid_sample_rate(rate) assert logger.warning.called is False assert result is True @@ -77,7 +78,7 @@ def test_accepts_valid_sample_rate(rate): ) def test_warns_on_invalid_sample_rate(rate, StringContaining): # noqa: N803 with mock.patch.object(logger, "warning", mock.Mock()): - result = _is_valid_sample_rate(rate) + result = is_valid_sample_rate(rate) logger.warning.assert_any_call(StringContaining("Given sample rate is invalid")) assert result is False