|
| 1 | +from typing import Union, List, Dict |
| 2 | +# pylint: disable=import-error |
| 3 | +from requests import Response |
| 4 | +from re import sub as re_sub |
| 5 | +import logging |
| 6 | +import base64 |
| 7 | + |
| 8 | +from bookstack_file_exporter.common import util as common_util |
| 9 | + |
| 10 | +log = logging.getLogger(__name__) |
| 11 | + |
| 12 | +_IMAGE_DIR_NAME = "images" |
| 13 | +_ATTACHMENT_DIR_NAME = "attachments" |
| 14 | + |
| 15 | + |
| 16 | +class AssetNode: |
| 17 | + def __init__(self, meta_data: Dict[str, int | str | bool]): |
| 18 | + self.id: int = meta_data['id'] |
| 19 | + self.page_id: int = meta_data['uploaded_to'] |
| 20 | + # self.page_name: str = page_name |
| 21 | + self.url: str = meta_data['url'] |
| 22 | + self.name: str = self.url.split('/')[-1] |
| 23 | + self._markdown_str = "" |
| 24 | + self._relative_path_prefix: str = "" |
| 25 | + |
| 26 | + def get_relative_path(self, page_name: str) -> str: |
| 27 | + """image path local to page directory""" |
| 28 | + return f"{self._relative_path_prefix}/{page_name}/{self.name}" |
| 29 | + |
| 30 | + @property |
| 31 | + def markdown_str(self): |
| 32 | + """return markdown url str to replace""" |
| 33 | + return self._markdown_str |
| 34 | + |
| 35 | + def set_markdown_content(self, asset_data: Dict[str, int | str | bool]) -> None: |
| 36 | + self._markdown_str = self._get_md_url_str(asset_data) |
| 37 | + |
| 38 | + @staticmethod |
| 39 | + def _get_md_url_str(asset_data: Dict[str, Union[int, str]]) -> str: |
| 40 | + url_str = "" |
| 41 | + if 'content' in asset_data: |
| 42 | + if 'markdown' in asset_data['content']: |
| 43 | + url_str = asset_data['content']['markdown'] |
| 44 | + # check to see if empty before doing find |
| 45 | + if not url_str: |
| 46 | + return "" |
| 47 | + # find the link between two parenthesis |
| 48 | + # - markdown format |
| 49 | + return url_str[url_str.find("(")+1:url_str.find(")")] |
| 50 | + |
| 51 | +class ImageNode(AssetNode): |
| 52 | + def __init__(self, meta_data: Dict[str, Union[int, str]]): |
| 53 | + super().__init__(meta_data) |
| 54 | + log.debug(self.url) |
| 55 | + self._relative_path_prefix = f"{_IMAGE_DIR_NAME}" |
| 56 | + |
| 57 | +class AttachmentNode(AssetNode): |
| 58 | + def __init__(self, meta_data: Dict[str, Union[int, str, bool]], |
| 59 | + base_url: str): |
| 60 | + self.id: int = meta_data['id'] |
| 61 | + self.page_id: int = meta_data['uploaded_to'] |
| 62 | + self.url: str = f"{base_url}/{self.id}" |
| 63 | + log.debug(self.url) |
| 64 | + self.name = meta_data['name'] |
| 65 | + self._markdown_str = "" |
| 66 | + self._relative_path_prefix = f"{_ATTACHMENT_DIR_NAME}" |
| 67 | + |
| 68 | + @staticmethod |
| 69 | + def _get_md_url_str(asset_data: Dict[str, int | str | dict]) -> str: |
| 70 | + url_str = "" |
| 71 | + if 'links' in asset_data: |
| 72 | + if 'markdown' in asset_data['links']: |
| 73 | + url_str = asset_data['links']['markdown'] |
| 74 | + # check to see if empty before doing find |
| 75 | + if not url_str: |
| 76 | + return "" |
| 77 | + # find the link between two parenthesis |
| 78 | + # - markdown format |
| 79 | + return url_str[url_str.find("(")+1:url_str.find(")")] |
| 80 | + |
| 81 | +class AssetArchiver: |
| 82 | + def __init__(self, urls: Dict[str, str], headers: Dict[str, str], |
| 83 | + verify_ssl: bool): |
| 84 | + self.api_urls = urls |
| 85 | + self.verify_ssl = verify_ssl |
| 86 | + self._headers = headers |
| 87 | + self._asset_map = { |
| 88 | + 'images': self._create_image_map, |
| 89 | + 'attachments': self._create_attachment_map |
| 90 | + } |
| 91 | + |
| 92 | + def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNode]: |
| 93 | + """Get image or attachment helpers for a page""" |
| 94 | + asset_response: Response = common_util.http_get_request( |
| 95 | + self.api_urls[asset_type], |
| 96 | + self._headers, |
| 97 | + self.verify_ssl) |
| 98 | + asset_json = asset_response.json()['data'] |
| 99 | + return self._asset_map[asset_type](asset_json) |
| 100 | + |
| 101 | + def get_asset_data(self, asset_type: str, |
| 102 | + meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]: |
| 103 | + """Get asset data based on type""" |
| 104 | + data_url = f"{self.api_urls[asset_type]}/{meta_data.id}" |
| 105 | + asset_data_response: Response = common_util.http_get_request( |
| 106 | + data_url, |
| 107 | + self._headers, |
| 108 | + self.verify_ssl) |
| 109 | + return asset_data_response.json() |
| 110 | + |
| 111 | + def get_asset_bytes(self, asset_type: str, url: str) -> bytes: |
| 112 | + """Get raw asset data""" |
| 113 | + asset_response: Response = common_util.http_get_request( |
| 114 | + url, |
| 115 | + self._headers, |
| 116 | + self.verify_ssl) |
| 117 | + match asset_type: |
| 118 | + case "images": |
| 119 | + asset_data = asset_response.content |
| 120 | + case "attachments": |
| 121 | + asset_data = self.decode_attachment_data(asset_response.json()['content']) |
| 122 | + return asset_data |
| 123 | + |
| 124 | + def update_asset_links(self, asset_type, page_name: str, page_data: bytes, |
| 125 | + asset_nodes: List[ImageNode | AttachmentNode]) -> bytes: |
| 126 | + """update markdown links in page data""" |
| 127 | + for asset_node in asset_nodes: |
| 128 | + asset_data = self.get_asset_data(asset_type, asset_node) |
| 129 | + asset_node.set_markdown_content(asset_data) |
| 130 | + if not asset_node.markdown_str: |
| 131 | + continue |
| 132 | + page_data = re_sub(asset_node.markdown_str.encode(), |
| 133 | + asset_node.get_relative_path(page_name).encode(), page_data) |
| 134 | + return page_data |
| 135 | + |
| 136 | + @staticmethod |
| 137 | + def _create_image_map(json_data: Dict[str, |
| 138 | + List[Dict[str, str | int | bool | dict]]]) -> Dict[int, List[ImageNode]]: |
| 139 | + image_page_map = {} |
| 140 | + for img_meta in json_data: |
| 141 | + img_node = ImageNode(img_meta) |
| 142 | + if img_node.page_id in image_page_map: |
| 143 | + image_page_map[img_node.page_id].append(img_node) |
| 144 | + else: |
| 145 | + image_page_map[img_node.page_id] = [img_node] |
| 146 | + return image_page_map |
| 147 | + |
| 148 | + def _create_attachment_map(self, |
| 149 | + json_data: Dict[str, List[Dict[str, str | int | bool | dict]]]) -> List[AssetNode]: |
| 150 | + asset_nodes = {} |
| 151 | + for asset_meta in json_data: |
| 152 | + asset_node = None |
| 153 | + if asset_meta['external']: |
| 154 | + continue # skip external link, only get attachments |
| 155 | + asset_node = AttachmentNode(asset_meta, self.api_urls['attachments']) |
| 156 | + if asset_node.page_id in asset_nodes: |
| 157 | + asset_nodes[asset_node.page_id].append(asset_node) |
| 158 | + else: |
| 159 | + asset_nodes[asset_node.page_id] = [asset_node] |
| 160 | + return asset_nodes |
| 161 | + |
| 162 | + @staticmethod |
| 163 | + def decode_attachment_data(b64encoded_data: str) -> bytes: |
| 164 | + """decode base64 encoded data""" |
| 165 | + asset_data = b64encoded_data.encode() |
| 166 | + return base64.b64decode(asset_data) |
0 commit comments