8000 Do not eagerly fetch content when a stream was requested · tableau/server-client-python@4266120 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4266120

Browse files
committed
Do not eagerly fetch content when a stream was requested
1 parent 890d8e7 commit 4266120

File tree

6 files changed

+89
-61
lines changed

6 files changed

+89
-61
lines changed

tableauserverclient/helpers/strings.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import requests
2-
31
from defusedxml.ElementTree import fromstring, tostring
42
from functools import singledispatch
53
from typing import TypeVar
@@ -11,24 +9,6 @@
119
T = TypeVar("T", str, bytes)
1210

1311

14-
# TODO: ideally this would be in the logging config
15-
def safe_to_log(server_response: requests.Response) -> str:
16-
"""Checks if the server_response content is not xml (eg binary image or zip)
17-
and replaces it with a constant"""
18-
ALLOWED_CONTENT_TYPES = ("application/xml", "application/xml;charset=utf-8")
19-
if server_response.headers.get("Content-Type", None) not in ALLOWED_CONTENT_TYPES:
20-
return "[Truncated File Contents]"
21-
22-
""" Check to determine if the response is a text response (xml or otherwise)
23-
so that we do not attempt to log bytes and other binary data. """
24-
if not server_response.content or not server_response.encoding:
25-
return ""
26-
# max length 1000
27-
loggable_response: str = server_response.content.decode(server_response.encoding)[:1000]
28-
redacted_response: str = redact_xml(loggable_response)
29-
return redacted_response
30-
31-
3212
# usage: _redact_any_type("<xml workbook password= cooliothesecond>")
3313
# -> b"<xml workbook password =***************">
3414
def _redact_any_type(xml: T, sensitive_word: T, replacement: T, encoding=None) -> T:

tableauserverclient/models/view_item.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import copy
2-
from typing import Callable, Iterable, List, Optional, Set, TYPE_CHECKING
2+
from typing import Callable, Generator, Iterator, List, Optional, Set, TYPE_CHECKING
33

44
from defusedxml.ElementTree import fromstring
55

@@ -24,8 +24,8 @@ def __init__(self) -> None:
2424
self._preview_image: Optional[Callable[[], bytes]] = None
2525
self._project_id: Optional[str] = None
2626
self._pdf: Optional[Callable[[], bytes]] = None
27-
self._csv: Optional[Callable[[], Iterable[bytes]]] = None
28-
self._excel: Optional[Callable[[], Iterable[bytes]]] = None
27+
self._csv: Optional[Callable[[], Iterator[bytes]]] = None
28+
self._excel: Optional[Callable[[], Iterator[bytes]]] = None
2929
self._total_views: Optional[int] = None
3030
self._sheet_type: Optional[str] = None
3131
self._updated_at: Optional["datetime"] = None
@@ -94,14 +94,14 @@ def pdf(self) -> bytes:
9494
return self._pdf()
9595

9696
@property
97-
def csv(self) -> Iterable[bytes]:
97+
def csv(self) -> Iterator[bytes]:
9898
if self._csv is None:
9999
error = "View item must be populated with its csv first."
100100
raise UnpopulatedPropertyError(error)
101101
return self._csv()
102102

103103
@property
104-
def excel(self) -> Iterable[bytes]:
104+
def excel(self) -> Iterator[bytes]:
105105
if self._excel is None:
106106
error = "View item must be populated with its excel first."
107107
raise UnpopulatedPropertyError(error)

tableauserverclient/server/endpoint/endpoint.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import requests
12
import logging
23
from distutils.version import LooseVersion as Version
34
from functools import wraps
45
from xml.etree.ElementTree import ParseError
6+
from typing import Any, Callable, Dict, Optional, TYPE_CHECKING
57

68
from .exceptions import (
79
ServerResponseError,
@@ -19,9 +21,13 @@
1921
XML_CONTENT_TYPE = "text/xml"
2022
JSON_CONTENT_TYPE = "application/json"
2123

24+
if TYPE_CHECKING:
25+
from ..server import Server
26+
from requests import Response
27+
2228

2329
class Endpoint(object):
24-
def __init__(self, parent_srv):
30+
def __init__(self, parent_srv: "Server"):
2531
self.parent_srv = parent_srv
2632

2733
@staticmethod
@@ -36,13 +42,13 @@ def _make_common_headers(auth_token, content_type):
3642

3743
def _make_request(
3844
self,
39-
method,
40-
url,
41-
content=None,
42-
auth_token=None,
43-
content_type=None,
44-
parameters=None,
45-
):
45+
method: Callable[..., "Response"],
46+
url: str,
47+
content: Optional[bytes] = None,
48+
auth_token: Optional[str] = None,
49+
content_type: Optional[str] = None,
50+
parameters: Optional[Dict[str, Any]] = None,
51+
) -> "Response":
4652
parameters = parameters or {}
4753
parameters.update(self.parent_srv.http_options)
4854
if "headers" not in parameters:
@@ -58,18 +64,20 @@ def _make_request(
5864

5965
server_response = method(url, **parameters)
6066
self._check_status(server_response)
61-
if server_response.headers.get("Content-Type") == "application/octet-stream":
62-
logger.debug("Server response from {0} was of type application/octet-stream".format(url))
63-
else:
64-
loggable_response = helpers.strings.safe_to_log(server_response)
67+
68+
loggable_response = self.log_response_safely(server_response)
69+
logger.debug("Server response from {0}:\n\t{1}".format(url, loggable_response))
70+
71+
if content_type == "application/xml":
6572
self.parent_srv._namespace.detect(server_response.content)
66-
logger.debug("Server response from {0}:\n\t{1}".format(url, loggable_response))
73+
6774
return server_response
6875

6976
def _check_status(self, server_response):
7077
if server_response.status_code >= 500:
7178
raise InternalServerError(server_response)
7279
elif server_response.status_code not in Success_codes:
80+
# todo: is an error reliably of content-type application/xml?
7381
try:
7482
raise ServerResponseError.from_response(server_response.content, self.parent_srv.namespace)
7583
except ParseError:
@@ -81,6 +89,22 @@ def _check_status(self, server_response):
8189
except Exception:
8290
# anything else re-raise here
8391
raise
92+
93+
94+
def log_response_safely(self, server_response: requests.Response) -> str:
95+
# Checking the content type header prevents eager evaluation of streaming requests.
96+
content_type = server_response.headers.get("Content-Type")
97+
98+
# Response.content is a property. Calling it will load the entire response into memory. Checking if the
99+
# content-type is an octet-stream accomplishes the same goal without eagerly loading content.
100+
# This check is to determine if the response is a text response (xml or otherwise)
101+
# so that we do not attempt to log bytes and other binary data.
102+
loggable_response = "Content type {}".format(content_type)
103+
if content_type == "application/octet-stream":
104+
loggable_response = "A stream of type {} [Truncated File Contents]".format(content_type)
105+
elif server_response.encoding and len(server_response.content) > 0:
106+
loggable_response = helpers.strings.redact_xml(server_response.content.decode(server_response.encoding))
107+
return loggable_response
84108

85109
def get_unauthenticated_request(self, url):
86110
return self._make_request(self.parent_srv.session.get, url)

tableauserverclient/server/endpoint/views_endpoint.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
logger = logging.getLogger("tableau.endpoint.views")
1111

12-
from typing import Iterable, List, Optional, Tuple, TYPE_CHECKING
12+
from typing import Iterator, List, Optional, Tuple, TYPE_CHECKING
1313

1414
if TYPE_CHECKING:
1515
from ..request_options import RequestOptions, CSVRequestOptions, PDFRequestOptions, ImageRequestOptions
@@ -119,12 +119,11 @@ def csv_fetcher():
119119
view_item._set_csv(csv_fetcher)
120120
logger.info("Populated csv for view (ID: {0})".format(view_item.id))
121121

122-
def _get_view_csv(self, view_item: ViewItem, req_options: Optional["CSVRequestOptions"]) -> Iterable[bytes]:
122+
def _get_view_csv(self, view_item: ViewItem, req_options: Optional["CSVRequestOptions"]) -> Iterator[bytes]:
123123
url = "{0}/{1}/data".format(self.baseurl, view_item.id)
124124

125125
with closing(self.get_request(url, request_object=req_options, parameters={"stream": True})) as server_response:
126-
csv = server_response.iter_content(1024)
127-
return csv
126+
yield from server_response.iter_content(1024)
128127

129128
@api(version="3.8")
130129
def populate_excel(self, view_item: ViewItem, req_options: Optional["CSVRequestOptions"] = None) -> None:
@@ -138,12 +137,11 @@ def excel_fetcher():
138137
view_item._set_excel(excel_fetcher)
139138
logger.info("Populated excel for view (ID: {0})".format(view_item.id))
140139

141-
def _get_view_excel(self, view_item: ViewItem, req_options: Optional["CSVRequestOptions"]) -> Iterable[bytes]:
140+
def _get_view_excel(self, view_item: ViewItem, req_options: Optional["CSVRequestOptions"]) -> Iterator[bytes]:
142141
url = "{0}/{1}/crosstab/excel".format(self.baseurl, view_item.id)
143142

144143
with closing(self.get_request(url, request_object=req_options, parameters={"stream": True})) as server_response:
145-
excel = server_response.iter_content(1024)
146-
return excel
144+
yield from server_response.iter_content(1024)
147145

148146
@api(version="3.2")
149147
def populate_permissions(self, item: ViewItem) -> None:

test/test_endpoint.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from pathlib import Path
2+
import unittest
3+
4+
import tableauserverclient as TSC
5+
6+
import requests_mock
7+
8+
ASSETS = Path(__file__).parent / "assets"
9+
10+
11+
class TestEndpoint(unittest.TestCase):
12+
def setUp(self) -> None:
13+
self.server = TSC.Server("http://test/", use_server_version=False)
14+
15+
# Fake signin
16+
self.server._site_id = "dad65087-b08b-4603-af4e-2887b8aafc67"
17+
self.server._auth_token = "j80k54ll2lfMZ0tv97mlPvvSCRyD0DOM"
18+
19+
return super().setUp()
20+
21+
def test_get_request_stream(self) -> None:
22+
url = "http://test/"
23+
endpoint = TSC.server.Endpoint(self.server)
24+
with requests_mock.mock() as m:
25+
m.get(url, headers={"Content-Type": "application/octet-stream"})
26+
response = endpoint.get_request(url, parameters={"stream": True})
27+
28+
self.assertFalse(response._content_consumed)
29+
30+
31+
def test_binary_log_truncated(self):
32+
class FakeResponse(object):
33+
34+
headers = {"Content-Type": "application/octet-stream"}
35+
content = b"\x1337" * 1000
36+
status_code = 200
37+
38+
endpoint = TSC.server.Endpoint(self.server)
39+
server_response = FakeResponse()
40+
log = endpoint.log_response_safely(server_response)
41+
self.assertTrue(log.find("[Truncated File Contents]") > 0, log)

test/test_regression_tests.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import unittest
2-
import pytest
3-
import sys
42

53
try:
64
from unittest import mock
75
except ImportError:
86
import mock # type: ignore[no-redef]
97

108
import tableauserverclient.server.request_factory as factory
11-
from tableauserverclient.models.workbook_item import WorkbookItem
12-
from tableauserverclient.helpers.strings import redact_xml, safe_to_log
9+
from tableauserverclient.helpers.strings import redact_xml
1310
from tableauserverclient.filesys_helpers import to_filename, make_download_path
1411

1512

@@ -19,18 +16,6 @@ def test_empty_request_works(self):
1916
self.assertEqual(b"<tsRequest />", result)
2017

2118

22-
class BugFix273(unittest.TestCase):
23-
def test_binary_log_truncated(self):
24-
class FakeResponse(object):
25-
26-
headers = {"Content-Type": "application/octet-stream"}
27-
content = b"\x1337" * 1000
28-
status_code = 200
29-
30-
server_response = FakeResponse()
31-
self.assertEqual(safe_to_log(server_response), "[Truncated File Contents]")
32-
33-
3419
class FileSysHelpers(unittest.TestCase):
3520
def test_to_filename(self):
3621
invalid = [

0 commit comments

Comments
 (0)
0