8000 add new http config input functionality · homeylab/bookstack-file-exporter@843daef · GitHub
[go: up one dir, main page]

Skip to content

Commit 843daef

Browse files
committed
add new http config input functionality
1 parent 41ff93e commit 843daef

File tree

9 files changed

+100
-84
lines changed

9 files changed

+100
-84
lines changed

bookstack_file_exporter/archiver/archiver.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
1010
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
1111
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
12+
from bookstack_file_exporter.common.util import HttpHelper
1213

1314
log = logging.getLogger(__name__)
1415

@@ -22,17 +23,18 @@ class Archiver:
2223
2324
Args:
2425
:config: <ConfigNode> = Configuration with user inputs and general options.
26+
:http_client: <HttpHelper> = http helper functions with config from user inputs
2527
2628
Returns:
2729
Archiver instance with attributes that are accessible
2830
for use for handling bookstack exports and remote uploads.
2931
"""
30-
def __init__(self, config: ConfigNode):
32+
def __init__(self, config: ConfigNode, http_client: HttpHelper):
3133
self.config = config
3234
# for convenience
3335
self.base_dir = config.base_dir_name
3436
self.archive_dir = self._generate_root_folder(self.base_dir)
35-
self._page_archiver = PageArchiver(self.archive_dir, self.config)
37+
self._page_archiver = PageArchiver(self.archive_dir, self.config, http_client)
3638
self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
3739

3840
def create_export_dir(self):

bookstack_file_exporter/archiver/asset_archiver.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# pylint: disable=import-error
66
from requests import Response
77

8-
from bookstack_file_exporter.common import util as common_util
8+
from bookstack_file_exporter.common.util import HttpHelper
99

1010
log = logging.getLogger(__name__)
1111

@@ -112,47 +112,39 @@ class AssetArchiver:
112112
113113
Args:
114114
:urls: <Dict[str, str]> = api urls for images and attachments
115-
:headers: <Dict[str, str]> = http headers for api requests
116115
:verify_ssl: <bool> = verify ssl for api requests
116+
:http_client: <HttpHelper> = http helper functions with config from user inputs
117117
118118
Returns:
119119
AssetArchiver instance for use in archiving images and attachments for a page
120120
"""
121-
def __init__(self, urls: Dict[str, str], headers: Dict[str, str],
122-
verify_ssl: bool):
121+
def __init__(self, urls: Dict[str, str], http_client: HttpHelper):
123122
self.api_urls = urls
124-
self.verify_ssl = verify_ssl
125-
self._headers = headers
126123
self._asset_map = {
127124
'images': self._create_image_map,
128125
'attachments': self._create_attachment_map
129126
}
127+
self.http_client = http_client
130128

131129
def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNode]:
132130
"""Get image or attachment helpers for a page"""
133-
asset_response: Response = common_util.http_get_request(
134-
self.api_urls[asset_type],
135-
self._headers,
136-
self.verify_ssl)
131+
asset_response: Response = self.http_client.http_get_request(
132+
self.api_urls[asset_type])
137133
asset_json = asset_response.json()['data']
138134
return self._asset_map[asset_type](asset_json)
139135

140136
def get_asset_data(self, asset_type: str,
141137
meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]:
142138
"""Get asset data based on type"""
143139
data_url = f"{self.api_urls[asset_type]}/{meta_data.id_}"
144-
asset_data_response: Response = common_util.http_get_request(
145-
data_url,
146-
self._headers,
147-
self.verify_ssl)
140+
asset_data_response: Response = self.http_client.http_get_request(
141+
data_url)
148142
return asset_data_response.json()
149143

150144
def get_asset_bytes(self, asset_type: str, url: str) -> bytes:
151145
"""Get raw asset data"""
152-
asset_response: Response = common_util.http_get_request(
153-
url,
154-
self._headers,
155-
self.verify_ssl)
146+
asset_response: Response = self.http_client.http_get_request(
147+
url)
156148
match asset_type:
157149
case "images":
158150
asset_data = asset_response.content

bookstack_file_exporter/archiver/page_archiver.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from bookstack_file_exporter.archiver import util as archiver_util
77
from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode
88
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
9+
from bookstack_file_exporter.common.util import HttpHelper
910

1011
log = logging.getLogger(__name__)
1112

@@ -33,26 +34,26 @@ class PageArchiver:
3334
3435
Args:
3536
:archive_dir: <str> = directory where data will be put into.
36-
3737
:config: <ConfigNode> = Configuration with user inputs and general options.
38+
:http_client: <HttpHelper> = http helper functions with config from user inputs
3839
3940
Returns:
4041
:PageArchiver: instance with methods to help collect page content from a Bookstack instance.
4142
"""
42-
def __init__(self, archive_dir: str, config: ConfigNode) -> None:
43+
def __init__(self, archive_dir: str, config: ConfigNode, http_client: HttpHelper) -> None:
4344
self.asset_config = config.user_inputs.assets
4445
self.export_formats = config.user_inputs.formats
4546
self.api_urls = config.urls
46-
self._headers = config.headers
4747
# full path, bookstack-<timestamp>, and .tgz extension
4848
self.archive_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tgz']}"
4949
# name of intermediate tar file before gzip
5050
self.tar_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tar']}"
5151
# name of the base folder to use within the tgz archive (internal tar layout)
5252
self.archive_base_path = archive_dir.split("/")[-1]
5353
self.modify_md: bool = self._check_md_modify()
54-
self.asset_archiver = AssetArchiver(self.api_urls, self._headers,
55-
self.verify_ssl)
54+
self.asset_archiver = AssetArchiver(self.api_urls,
55+
http_client)
56+
self.http_client = http_client
5657

5758
def _check_md_modify(self) -> bool:
5859
# check to ensure they have asset_config defined, could be None
@@ -107,8 +108,8 @@ def _archive_page(self, page: Node, export_format: str, data: bytes):
107108

108109
def _get_page_data(self, page_id: int, export_format: str) -> bytes:
109110
url = f"{self.api_urls['pages']}/{page_id}/{_EXPORT_API_PATH}/{export_format}"
110-
return archiver_util.get_byte_response(url=url, headers=self._headers,
111-
verify_ssl=self.verify_ssl)
111+
return archiver_util.get_byte_response(url=url,
112+
http_client=self.http_client)
112113

113114
def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int]]):
114115
meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"

bookstack_file_exporter/archiver/util.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
import glob
1010
from pathlib import Path
1111

12-
from bookstack_file_exporter.common import util
12+
from bookstack_file_exporter.common.util import HttpHelper
1313

1414
log = logging.getLogger(__name__)
1515

16-
def get_byte_response(url: str, headers: Dict[str, str], verify_ssl: bool) -> bytes:
16+
def get_byte_response(url: str, http_client: HttpHelper) -> bytes:
1717
"""get byte response from http request"""
18-
response = util.http_get_request(url=url, headers=headers, verify_ssl=verify_ssl)
18+
response = http_client.http_get_request(url=url)
1919
return response.content
2020

2121
# append to a tar file instead of creating files locally and then tar'ing after
Lines changed: 50 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,59 @@
11
import logging
2-
from typing import Dict
2+
from typing import Dict, List
33
# pylint: disable=import-error
44
import requests
55
# pylint: disable=import-error
66
from requests.adapters import HTTPAdapter, Retry
77

8+
from bookstack_file_exporter.config_helper.models import HttpConfig
9+
810
log = logging.getLogger(__name__)
911

10-
def http_get_request(url: str, headers: Dict[str, str],
11-
verify_ssl: bool, timeout: int = 30) -> requests.Response:
12-
"""make http requests and return response object"""
13-
url_prefix = should_verify(url)
14-
try:
15-
with requests.Session() as session:
16-
# {backoff factor} * (2 ** ({number of previous retries}))
17-
# {raise_on_status} if status falls in status_forcelist range
18-
# and retries have been exhausted.
19-
# {status_force_list} 413, 429, 503 defaults are overwritten with additional ones
20-
retries = Retry(total=5,
21-
backoff_factor=0.5,
22-
raise_on_status=True,
23-
status_forcelist=[413, 429, 500, 502, 503, 504])
24-
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
25-
response = session.get(url, headers=headers, verify=verify_ssl, timeout=timeout)
26-
except Exception as req_err:
27-
log.error("Failed to make request for %s", url)
28-
raise req_err
29-
try:
30-
#raise_for_status() throws an exception on codes 400-599
31-
response.raise_for_status()
32-
except requests.exceptions.HTTPError as e:
33-
# this means it either exceeded 50X retries in `http_get_request` handler
34-
# or it returned a 40X which is not expected
35-
log.error("Bookstack request failed with status code: %d on url: %s",
36-
response.status_code, url)
37-
raise e
38-
return response
12+
class HttpHelper:
13+
def __init__(self, headers: Dict[str, str],
14+
config: HttpConfig):
15+
self.backoff_factor = config.backoff_factor
16+
self.retry_codes = config.retry_codes
17+
self.retry_count = config.retry_count
18+
self.http_timeout = config.timeout
19+
self.verify_ssl = config.verify_ssl
20+
self._headers = headers
21+
22+
23+
# more details on options: https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html
24+
def http_get_request(self, url: str) -> requests.Response:
25+
"""make http requests and return response object"""
26+
url_prefix = self.should_verify(url)
27+
try:
28+
with requests.Session() as session:
29+
# {backoff factor} * (2 ** ({number of previous retries}))
30+
# {raise_on_status} if status falls in status_forcelist range
31+
# and retries have been exhausted.
32+
# {status_force_list} 413, 429, 503 defaults are overwritten with additional ones
33+
retries = Retry(total=self.retry_count,
34+
backoff_factor=self.backoff_factor,
35+
raise_on_status=True,
36+
status_forcelist=self.retry_codes)
37+
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
38+
response = session.get(url, headers=self._headers, verify=self.verify_ssl,
39+
timeout=self.http_timeout)
40+
except Exception as req_err:
41+
log.error("Failed to make request for %s", url)
42+
raise req_err
43+
try:
44+
#raise_for_status() throws an exception on codes 400-599
45+
response.raise_for_status()
46+
except requests.exceptions.HTTPError as e:
47+
# this means it either exceeded 50X retries in `http_get_request` handler
48+
# or it returned a 40X which is not expected
49+
log.error("Bookstack request failed with status code: %d on url: %s",
50+
response.status_code, url)
51+
raise e
52+
return response
3953

40-
def should_verify(url: str) -> str:
41-
"""check if http or https"""
42-
if url.startswith("https"):
43-
return "https://"
44-
return "http://"
54+
@staticmethod
55+
def should_verify(url: str) -> str:
56+
"""check if http or https"""
57+
if url.startswith("https"):
58+
return "https://"
59+
return "http://"

bookstack_file_exporter/config_helper/config_helper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ def _generate_remote_config(self) -> Dict[str, StorageProviderConfig]:
105105
def _generate_headers(self) -> Dict[str, str]:
106106
headers = {}
107107
# add additional_headers provided by user
108-
if self.user_inputs.additional_headers:
109-
for key, value in self.user_inputs.additional_headers.items():
108+
if self.user_inputs.http_config.additional_headers:
109+
for key, value in self.user_inputs.http_config.additional_headers.items():
110110
headers[key] = value
111111

112112
# add default headers

bookstack_file_exporter/config_helper/models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,26 @@ class Assets(BaseModel):
2626
export_attachments: Optional[bool] = False
2727
modify_markdown: Optional[bool] = False
2828
export_meta: Optional[bool] = False
29-
verify_ssl: Optional[bool] = True
29+
# verify_ssl: Optional[bool] = True
30+
31+
class HttpConfig(BaseModel):
32+
"""YAML schema for user provided http settings"""
33+
verify_ssl: Optional[bool] = False
34+
timeout: Optional[int] = 30
35+
backoff_factor: Optional[float] = 2.5
36+
retry_codes: Optional[List[int]] = [413, 429, 500, 502, 503, 504]
37+
retry_count: Optional[int] = 5
38+
additional_headers: Optional[Dict[str, str]] = {}
3039

3140
# pylint: disable=too-few-public-methods
3241
class UserInput(BaseModel):
3342
"""YAML schema for user provided configuration file"""
3443
host: str
35-
additional_headers: Optional[Dict[str, str]] = None
3644
credentials: Optional[BookstackAccess] = None
3745
formats: List[Literal["markdown", "html", "pdf", "plaintext"]]
3846
output_path: Optional[str] = None
3947
assets: Optional[Assets] = Assets()
4048
minio: Optional[ObjectStorageConfig] = None
4149
keep_last: Optional[int] = None
42-
run_interval: Optional[int] = 0
50+
run_interval: Optional[int] = 0
51+
http_config: Optional[HttpConfig] = HttpConfig()

bookstack_file_exporter/exporter/exporter.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from requests import Response
66

77
from bookstack_file_exporter.exporter.node import Node
8-
from bookstack_file_exporter.common import util
8+
from bookstack_file_exporter.common.util import HttpHelper
99

1010
log = logging.getLogger(__name__)
1111

@@ -19,10 +19,9 @@ class NodeExporter():
1919
Returns:
2020
NodeExporter instance to handle building shelve/book/chapter/page relations.
2121
"""
22-
def __init__(self, api_urls: Dict[str, str], headers: Dict[str,str], verify_ssl: bool):
22+
def __init__(self, api_urls: Dict[str, str], http_client: HttpHelper):
2323
self.api_urls = api_urls
24-
self.headers = headers
25-
self.verify_ssl = verify_ssl
24+
self.http_client = http_client
2625

2726
def get_all_shelves(self) -> Dict[int, Node]:
2827
"""
@@ -38,8 +37,7 @@ def get_all_shelves(self) -> Dict[int, Node]:
3837

3938
def _get_json_response(self, url: str) -> List[Dict[str, Union[str,int]]]:
4039
"""get http response data in json format"""
41-
response: Response = util.http_get_request(url=url, headers=self.headers,
42-
verify_ssl=self.verify_ssl)
40+
response: Response = self.http_client.http_get_request(url=url)
4341
return response.json()
4442

4543
def _get_all_ids(self, url: str) -> List[int]:

bookstack_file_exporter/run.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from bookstack_file_exporter.exporter.node import Node
99
from bookstack_file_exporter.exporter.exporter import NodeExporter
1010
from bookstack_file_exporter.archiver.archiver import Archiver
11+
from bookstack_file_exporter.common.util import HttpHelper
1112

1213
log = logging.getLogger(__name__)
1314

@@ -26,23 +27,21 @@ def entrypoint(args: argparse.Namespace):
2627
def exporter(config: ConfigNode):
2728
"""export bookstack nodes and archive locally and/or remotely"""
2829

29-
## convenience vars
30-
bookstack_headers = config.headers
31-
api_urls = config.urls
32-
unassigned_dir = config.unassigned_book_dir
33-
verify_ssl = config.user_inputs.assets.verify_ssl
34-
3530
#### Export Data #####
3631
# need to implement pagination for apis
3732
log.info("Beginning run")
3833

34+
## Helper functions with user provided (or defaults) http config
35+
http_client = HttpHelper(config.headers, config.user_inputs.http_config)
36+
3937
## Use exporter class to get all the resources (pages, books, etc.) and their relationships
4038
log.info("Building shelve/book/chapter/page relationships")
41-
export_helper = NodeExporter(api_urls, bookstack_headers, verify_ssl)
39+
export_helper = NodeExporter(config.urls, http_client)
4240
## shelves
4341
shelve_nodes: Dict[int, Node] = export_helper.get_all_shelves()
4442
## books
45-
book_nodes: Dict[int, Node] = export_helper.get_all_books(shelve_nodes, unassigned_dir)
43+
book_nodes: Dict[int, Node] = export_helper.get_all_books(shelve_nodes,
44+
config.unassigned_book_dir)
4645
## pages
4746
page_nodes: Dict[int, Node] = export_helper.get_all_pages(book_nodes)
4847
if not page_nodes:

0 commit comments

Comments
 (0)
0