From bb0243a0afb4b2ad6488cf6f20cd9306d534f45b Mon Sep 17 00:00:00 2001
From: Katie Byers <lobsterkatie@gmail.com>
Date: Mon, 14 Dec 2020 18:48:01 -0800
Subject: [PATCH] split tracing utils into separate module

---
 sentry_sdk/integrations/django/__init__.py |   2 +-
 sentry_sdk/integrations/sqlalchemy.py      |   2 +-
 sentry_sdk/integrations/stdlib.py          |   2 +-
 sentry_sdk/tracing.py                      | 175 ++------------------
 sentry_sdk/tracing_utils.py                | 180 +++++++++++++++++++++
 tests/tracing/test_sampling.py             |   7 +-
 6 files changed, 196 insertions(+), 172 deletions(-)
 create mode 100644 sentry_sdk/tracing_utils.py

diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py
index 008dc386bb..4705d2486e 100644
--- a/sentry_sdk/integrations/django/__init__.py
+++ b/sentry_sdk/integrations/django/__init__.py
@@ -9,7 +9,7 @@
 from sentry_sdk.hub import Hub, _should_send_default_pii
 from sentry_sdk.scope import add_global_event_processor
 from sentry_sdk.serializer import add_global_repr_processor
-from sentry_sdk.tracing import record_sql_queries
+from sentry_sdk.tracing_utils import record_sql_queries
 from sentry_sdk.utils import (
     HAS_REAL_CONTEXTVARS,
     CONTEXTVARS_ERROR_MESSAGE,
diff --git a/sentry_sdk/integrations/sqlalchemy.py b/sentry_sdk/integrations/sqlalchemy.py
index 6c8e5eb88e..4b0207f5ec 100644
--- a/sentry_sdk/integrations/sqlalchemy.py
+++ b/sentry_sdk/integrations/sqlalchemy.py
@@ -3,7 +3,7 @@
 from sentry_sdk._types import MYPY
 from sentry_sdk.hub import Hub
 from sentry_sdk.integrations import Integration, DidNotEnable
-from sentry_sdk.tracing import record_sql_queries
+from sentry_sdk.tracing_utils import record_sql_queries
 
 try:
     from sqlalchemy.engine import Engine  # type: ignore
diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py
index 56cece70ac..d645ae7333 100644
--- a/sentry_sdk/integrations/stdlib.py
+++ b/sentry_sdk/integrations/stdlib.py
@@ -6,7 +6,7 @@
 from sentry_sdk.hub import Hub
 from sentry_sdk.integrations import Integration
 from sentry_sdk.scope import add_global_event_processor
-from sentry_sdk.tracing import EnvironHeaders
+from sentry_sdk.tracing_utils import EnvironHeaders
 from sentry_sdk.utils import capture_internal_exceptions, safe_repr
 
 from sentry_sdk._types import MYPY
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 73531894ef..18c3645cb0 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -1,27 +1,20 @@
-import re
 import uuid
-import contextlib
-import math
 import random
 import time
 
 from datetime import datetime, timedelta
-from numbers import Real
 
 import sentry_sdk
 
-from sentry_sdk.utils import (
-    capture_internal_exceptions,
-    logger,
-    to_string,
+from sentry_sdk.utils import logger
+from sentry_sdk.tracing_utils import (
+    SENTRY_TRACE_REGEX,
+    EnvironHeaders,
+    is_valid_sample_rate,
+    maybe_create_breadcrumbs_from_span,
 )
-from sentry_sdk._compat import PY2
 from sentry_sdk._types import MYPY
 
-if PY2:
-    from collections import Mapping
-else:
-    from collections.abc import Mapping
 
 if MYPY:
     import typing
@@ -35,45 +28,6 @@
 
     from sentry_sdk._types import SamplingContext
 
-_traceparent_header_format_re = re.compile(
-    "^[ \t]*"  # whitespace
-    "([0-9a-f]{32})?"  # trace_id
-    "-?([0-9a-f]{16})?"  # span_id
-    "-?([01])?"  # sampled
-    "[ \t]*$"  # whitespace
-)
-
-
-class EnvironHeaders(Mapping):  # type: ignore
-    def __init__(
-        self,
-        environ,  # type: typing.Mapping[str, str]
-        prefix="HTTP_",  # type: str
-    ):
-        # type: (...) -> None
-        self.environ = environ
-        self.prefix = prefix
-
-    def __getitem__(self, key):
-        # type: (str) -> Optional[Any]
-        return self.environ[self.prefix + key.replace("-", "_").upper()]
-
-    def __len__(self):
-        # type: () -> int
-        return sum(1 for _ in iter(self))
-
-    def __iter__(self):
-        # type: () -> Generator[str, None, None]
-        for k in self.environ:
-            if not isinstance(k, str):
-                continue
-
-            k = k.replace("-", "_").upper()
-            if not k.startswith(self.prefix):
-                continue
-
-            yield k[len(self.prefix) :]
-
 
 class _SpanRecorder(object):
     """Limits the number of spans recorded in a transaction."""
@@ -325,7 +279,7 @@ def from_traceparent(
         if traceparent.startswith("00-") and traceparent.endswith("-00"):
             traceparent = traceparent[3:-3]
 
-        match = _traceparent_header_format_re.match(str(traceparent))
+        match = SENTRY_TRACE_REGEX.match(str(traceparent))
         if match is None:
             return None
 
@@ -422,7 +376,7 @@ def finish(self, hub=None):
         except AttributeError:
             self.timestamp = datetime.utcnow()
 
-        _maybe_create_breadcrumbs_from_span(hub, self)
+        maybe_create_breadcrumbs_from_span(hub, self)
         return None
 
     def to_json(self):
@@ -618,7 +572,7 @@ def _set_initial_sampling_decision(self, sampling_context):
         # Since this is coming from the user (or from a function provided by the
         # user), who knows what we might get. (The only valid values are
         # booleans or numbers between 0 and 1.)
-        if not _is_valid_sample_rate(sample_rate):
+        if not is_valid_sample_rate(sample_rate):
             logger.warning(
                 "[Tracing] Discarding {transaction_description} because of invalid sample rate.".format(
                     transaction_description=transaction_description,
@@ -661,114 +615,3 @@ def _set_initial_sampling_decision(self, sampling_context):
                     sample_rate=float(sample_rate),
                 )
             )
-
-
-def _is_valid_sample_rate(rate):
-    # type: (Any) -> bool
-    """
-    Checks the given sample rate to make sure it is valid type and value (a
-    boolean or a number between 0 and 1, inclusive).
-    """
-
-    # both booleans and NaN are instances of Real, so a) checking for Real
-    # checks for the possibility of a boolean also, and b) we have to check
-    # separately for NaN
-    if not isinstance(rate, Real) or math.isnan(rate):
-        logger.warning(
-            "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format(
-                rate=rate, type=type(rate)
-            )
-        )
-        return False
-
-    # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False
-    rate = float(rate)
-    if rate < 0 or rate > 1:
-        logger.warning(
-            "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format(
-                rate=rate
-            )
-        )
-        return False
-
-    return True
-
-
-def _format_sql(cursor, sql):
-    # type: (Any, str) -> Optional[str]
-
-    real_sql = None
-
-    # If we're using psycopg2, it could be that we're
-    # looking at a query that uses Composed objects. Use psycopg2's mogrify
-    # function to format the query. We lose per-parameter trimming but gain
-    # accuracy in formatting.
-    try:
-        if hasattr(cursor, "mogrify"):
-            real_sql = cursor.mogrify(sql)
-            if isinstance(real_sql, bytes):
-                real_sql = real_sql.decode(cursor.connection.encoding)
-    except Exception:
-        real_sql = None
-
-    return real_sql or to_string(sql)
-
-
-@contextlib.contextmanager
-def record_sql_queries(
-    hub,  # type: sentry_sdk.Hub
-    cursor,  # type: Any
-    query,  # type: Any
-    params_list,  # type:  Any
-    paramstyle,  # type: Optional[str]
-    executemany,  # type: bool
-):
-    # type: (...) -> Generator[Span, None, None]
-
-    # TODO: Bring back capturing of params by default
-    if hub.client and hub.client.options["_experiments"].get(
-        "record_sql_params", False
-    ):
-        if not params_list or params_list == [None]:
-            params_list = None
-
-        if paramstyle == "pyformat":
-            paramstyle = "format"
-    else:
-        params_list = None
-        paramstyle = None
-
-    query = _format_sql(cursor, query)
-
-    data = {}
-    if params_list is not None:
-        data["db.params"] = params_list
-    if paramstyle is not None:
-        data["db.paramstyle"] = paramstyle
-    if executemany:
-        data["db.executemany"] = True
-
-    with capture_internal_exceptions():
-        hub.add_breadcrumb(message=query, category="query", data=data)
-
-    with hub.start_span(op="db", description=query) as span:
-        for k, v in data.items():
-            span.set_data(k, v)
-        yield span
-
-
-def _maybe_create_breadcrumbs_from_span(hub, span):
-    # type: (sentry_sdk.Hub, Span) -> None
-    if span.op == "redis":
-        hub.add_breadcrumb(
-            message=span.description, type="redis", category="redis", data=span._tags
-        )
-    elif span.op == "http":
-        hub.add_breadcrumb(type="http", category="httplib", data=span._data)
-    elif span.op == "subprocess":
-        hub.add_breadcrumb(
-            type="subprocess",
-            category="subprocess",
-            message=span.description,
-            data=span._data,
-        )
diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py
new file mode 100644
index 0000000000..aebb5c58d2
--- /dev/null
+++ b/sentry_sdk/tracing_utils.py
@@ -0,0 +1,180 @@
+import re
+import contextlib
+import math
+
+from numbers import Real
+
+import sentry_sdk
+
+from sentry_sdk.utils import (
+    capture_internal_exceptions,
+    logger,
+    to_string,
+)
+from sentry_sdk._compat import PY2
+from sentry_sdk._types import MYPY
+
+if PY2:
+    from collections import Mapping
+else:
+    from collections.abc import Mapping
+
+if MYPY:
+    import typing
+
+    from typing import Generator
+    from typing import Optional
+    from typing import Any
+
+    from sentry_sdk.tracing import Span
+
+
+SENTRY_TRACE_REGEX = re.compile(
+    "^[ \t]*"  # whitespace
+    "([0-9a-f]{32})?"  # trace_id
+    "-?([0-9a-f]{16})?"  # span_id
+    "-?([01])?"  # sampled
+    "[ \t]*$"  # whitespace
+)
+
+
+class EnvironHeaders(Mapping):  # type: ignore
+    def __init__(
+        self,
+        environ,  # type: typing.Mapping[str, str]
+        prefix="HTTP_",  # type: str
+    ):
+        # type: (...) -> None
+        self.environ = environ
+        self.prefix = prefix
+
+    def __getitem__(self, key):
+        # type: (str) -> Optional[Any]
+        return self.environ[self.prefix + key.replace("-", "_").upper()]
+
+    def __len__(self):
+        # type: () -> int
+        return sum(1 for _ in iter(self))
+
+    def __iter__(self):
+        # type: () -> Generator[str, None, None]
+        for k in self.environ:
+            if not isinstance(k, str):
+                continue
+
+            k = k.replace("-", "_").upper()
+            if not k.startswith(self.prefix):
+                continue
+
+            yield k[len(self.prefix) :]
+
+
+def is_valid_sample_rate(rate):
+    # type: (Any) -> bool
+    """
+    Checks the given sample rate to make sure it is valid type and value (a
+    boolean or a number between 0 and 1, inclusive).
+    """
+
+    # both booleans and NaN are instances of Real, so a) checking for Real
+    # checks for the possibility of a boolean also, and b) we have to check
+    # separately for NaN
+    if not isinstance(rate, Real) or math.isnan(rate):
+        logger.warning(
+            "[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format(
+                rate=rate, type=type(rate)
+            )
+        )
+        return False
+
+    # in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False
+    rate = float(rate)
+    if rate < 0 or rate > 1:
+        logger.warning(
+            "[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format(
+                rate=rate
+            )
+        )
+        return False
+
+    return True
+
+
+@contextlib.contextmanager
+def record_sql_queries(
+    hub,  # type: sentry_sdk.Hub
+    cursor,  # type: Any
+    query,  # type: Any
+    params_list,  # type:  Any
+    paramstyle,  # type: Optional[str]
+    executemany,  # type: bool
+):
+    # type: (...) -> Generator[Span, None, None]
+
+    # TODO: Bring back capturing of params by default
+    if hub.client and hub.client.options["_experiments"].get(
+        "record_sql_params", False
+    ):
+        if not params_list or params_list == [None]:
+            params_list = None
+
+        if paramstyle == "pyformat":
+            paramstyle = "format"
+    else:
+        params_list = None
+        paramstyle = None
+
+    query = _format_sql(cursor, query)
+
+    data = {}
+    if params_list is not None:
+        data["db.params"] = params_list
+    if paramstyle is not None:
+        data["db.paramstyle"] = paramstyle
+    if executemany:
+        data["db.executemany"] = True
+
+    with capture_internal_exceptions():
+        hub.add_breadcrumb(message=query, category="query", data=data)
+
+    with hub.start_span(op="db", description=query) as span:
+        for k, v in data.items():
+            span.set_data(k, v)
+        yield span
+
+
+def maybe_create_breadcrumbs_from_span(hub, span):
+    # type: (sentry_sdk.Hub, Span) -> None
+    if span.op == "redis":
+        hub.add_breadcrumb(
+            message=span.description, type="redis", category="redis", data=span._tags
+        )
+    elif span.op == "http":
+        hub.add_breadcrumb(type="http", category="httplib", data=span._data)
+    elif span.op == "subprocess":
+        hub.add_breadcrumb(
+            type="subprocess",
+            category="subprocess",
+            message=span.description,
+            data=span._data,
+        )
+
+
+def _format_sql(cursor, sql):
+    # type: (Any, str) -> Optional[str]
+
+    real_sql = None
+
+    # If we're using psycopg2, it could be that we're
+    # looking at a query that uses Composed objects. Use psycopg2's mogrify
+    # function to format the query. We lose per-parameter trimming but gain
+    # accuracy in formatting.
+    try:
+        if hasattr(cursor, "mogrify"):
+            real_sql = cursor.mogrify(sql)
+            if isinstance(real_sql, bytes):
+                real_sql = real_sql.decode(cursor.connection.encoding)
+    except Exception:
+        real_sql = None
+
+    return real_sql or to_string(sql)
diff --git a/tests/tracing/test_sampling.py b/tests/tracing/test_sampling.py
index 672110ada2..758b4be2da 100644
--- a/tests/tracing/test_sampling.py
+++ b/tests/tracing/test_sampling.py
@@ -3,7 +3,8 @@
 import pytest
 
 from sentry_sdk import Hub, start_span, start_transaction
-from sentry_sdk.tracing import Transaction, _is_valid_sample_rate
+from sentry_sdk.tracing import Transaction
+from sentry_sdk.tracing_utils import is_valid_sample_rate
 from sentry_sdk.utils import logger
 
 try:
@@ -56,7 +57,7 @@ def test_no_double_sampling(sentry_init, capture_events):
 )
 def test_accepts_valid_sample_rate(rate):
     with mock.patch.object(logger, "warning", mock.Mock()):
-        result = _is_valid_sample_rate(rate)
+        result = is_valid_sample_rate(rate)
         assert logger.warning.called is False
         assert result is True
 
@@ -77,7 +78,7 @@ def test_accepts_valid_sample_rate(rate):
 )
 def test_warns_on_invalid_sample_rate(rate, StringContaining):  # noqa: N803
     with mock.patch.object(logger, "warning", mock.Mock()):
-        result = _is_valid_sample_rate(rate)
+        result = is_valid_sample_rate(rate)
         logger.warning.assert_any_call(StringContaining("Given sample rate is invalid"))
         assert result is False