8000 Attempt to add logic to skip an image/attachment if API call fails by pchang388 · Pull Request #46 · homeylab/bookstack-file-exporter · GitHub
[go: up one dir, main page]

Skip to content

Attempt to add logic to skip an image/attachment if API call fails #46

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"features": {
"ghcr.io/devcontainers/features/python:1": {
"installTools": true,
"version": "3.12.4"
"version": "3.13.0"
}
},
"customizations": {
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/python/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ runs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.12.4'
python-version: '3.13.0'
- name: Install Dependencies
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/tests/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ runs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.12.4'
python-version: '3.13.0'
- name: Install dependencies
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG BASE_IMAGE=python
ARG BASE_IMAGE_TAG=3.12.4-slim-bookworm
ARG BASE_IMAGE_TAG=3.13.0-slim-bookworm

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}

Expand Down
15 changes: 14 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## DOCKER BUILD VARS
BASE_IMAGE=python
BASE_IMAGE_TAG=3.12.4-slim-bookworm
BASE_IMAGE_TAG=3.13.0-slim-bookworm
IMAGE_NAME=homeylab/bookstack-file-exporter
# keep this start sequence unique (IMAGE_TAG=)
# github actions will use this to create a tag
Expand All @@ -19,6 +19,9 @@ build:
python -m pip install --upgrade build
python -m build

lint:
pylint bookstack_file_exporter

upload_testpypi:
python -m pip install --upgrade twine
python -m twine upload --repository testpypi dist/*
Expand All @@ -27,6 +30,16 @@ upload_testpypi:
download_testpypi:
python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple bookstack-file-exporter

docker_build_simple:
docker build \
--build-arg BASE_IMAGE=${BASE_IMAGE} \
--build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} \
--build-arg DOCKER_WORK_DIR=${DOCKER_WORK_DIR} \
--build-arg DOCKER_CONFIG_DIR=${DOCKER_CONFIG_DIR} \
--build-arg DOCKER_EXPORT_DIR=${DOCKER_EXPORT_DIR} \
-t ${IMAGE_NAME}:${IMAGE_TAG} \
--no-cache .

docker_build:
docker buildx build \
--platform linux/amd64,linux/arm64 \
Expand Down
7 changes: 4 additions & 3 deletions bookstack_file_exporter/archiver/asset_archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class AssetNode:
AssetNode instance for use in other classes
"""
def __init__(self, meta_data: Dict[str, int | str | bool]):
self.id: int = meta_data['id']
self.id_: int = meta_data['id']
self.page_id: int = meta_data['uploaded_to']
self.url: str = ""
self.name: str = ""
Expand Down Expand Up @@ -88,7 +88,7 @@ class AttachmentNode(AssetNode):
def __init__(self, meta_data: Dict[str, Union[int, str, bool]],
base_url: str):
super().__init__(meta_data)
self.url: str = f"{base_url}/{self.id}"
self.url: str = f"{base_url}/{self.id_}"
self.name = meta_data['name']
log.debug("Attachment node has generated url: %s", self.url)
self._relative_path_prefix = f"{_ATTACHMENT_DIR_NAME}"
Expand Down Expand Up @@ -140,7 +140,7 @@ def get_asset_nodes(self, asset_type: str) -> Dict[str, ImageNode | AttachmentNo
def get_asset_data(self, asset_type: str,
meta_data: Union[AttachmentNode, ImageNode]) -> Dict[str, str | bool | int | dict]:
"""Get asset data based on type"""
data_url = f"{self.api_urls[asset_type]}/{meta_data.id}"
data_url = f"{self.api_urls[asset_type]}/{meta_data.id_}"
asset_data_response: Response = common_util.http_get_request(
data_url,
self._headers,
Expand All @@ -164,6 +164,7 @@ def update_asset_links(self, asset_type, page_name: str, page_data: bytes,
asset_nodes: List[ImageNode | AttachmentNode]) -> bytes:
"""update markdown links in page data"""
for asset_node in asset_nodes:
# get metadata instead of raw data/bytes
asset_data = self.get_asset_data(asset_type, asset_node)
asset_node.set_markdown_content(asset_data)
if not asset_node.markdown_str:
Expand Down
42 changes: 34 additions & 8 deletions bookstack_file_exporter/archiver/page_archiver.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from typing import Union, List, Dict

import logging
# pylint: disable=import-error
from requests.exceptions import HTTPError
from bookstack_file_exporter.exporter.node import Node
from bookstack_file_exporter.archiver import util as archiver_util
from bookstack_file_exporter.archiver.asset_archiver import AssetArchiver, ImageNode, AttachmentNode
from bookstack_file_exporter.config_helper.config_helper import ConfigNode

log = logging.getLogger(__name__)

_META_FILE_SUFFIX = "_meta.json"
_TAR_SUFFIX = ".tar"
_TAR_GZ_SUFFIX = ".tgz"
Expand Down Expand Up @@ -70,6 +74,19 @@ def archive_pages(self, page_nodes: Dict[int, Node]):
page_images = image_nodes[page.id_]
if page.id_ in attachment_nodes:
page_attachments = attachment_nodes[page.id_]
failed_images = self.archive_page_assets("images", page.parent.file_path,
page.name, page_images)
failed_attach = self.archive_page_assets("attachments", page.parent.file_path,
page.name, page_attachments)
# exclude from page_images
# so it doesn't attempt to get modified in markdown file
if failed_images:
page_images = [img for img in page_images if img.id_ not in failed_images]
# exclude from page_attachments
# so it doesn't attempt to get modified in markdown file
if failed_attach:
page_attachments = [attach for attach in page_attachments
if attach.id_ not in failed_attach]
for export_format in self.export_formats:
page_data = self._get_page_data(page.id_, export_format)
if page_images and export_format == 'markdown':
Expand All @@ -80,10 +97,6 @@ def archive_pages(self, page_nodes: Dict[int, Node]):
page_data, page_attachments)
self._archive_page(page, export_format,
page_data)
self.archive_page_assets("images", page.parent.file_path,
page.name, page_images)
self.archive_page_assets("attachments", page.parent.file_path,
page.name, page_attachments)
if self.asset_config.export_meta:
self._archive_page_meta(page.file_path, page.meta)

Expand Down Expand Up @@ -123,15 +136,28 @@ def _modify_markdown(self, asset_type: str,
asset_nodes)

def archive_page_assets(self, asset_type: str, parent_path: str, page_name: str,
asset_nodes: List[ImageNode | AttachmentNode]):
asset_nodes: List[ImageNode | AttachmentNode]) -> Dict[int, int]:
"""pull images locally into a directory based on page"""
if not asset_nodes:
return
return {}
# use a map for faster lookup
failed_assets = {}
node_base_path = f"{self.archive_base_path}/{parent_path}/"
for asset_node in asset_nodes:
asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url)
try:
asset_data = self.asset_archiver.get_asset_bytes(asset_type, asset_node.url)
except HTTPError:
# probably unnecessary, but just in case
if asset_node.id_ not in failed_assets:
failed_assets[asset_node.id_] = 0
# a 404 or other error occurred
# skip this asset
log.error("Failed to get image or attachment data " \
"for asset located at: %s - skipping", asset_node.url)
continue
asset_path = f"{node_base_path}/{asset_node.get_relative_path(page_name)}"
self.write_data(asset_path, asset_data)
return failed_assets

def write_data(self, file_path: str, data: bytes):
"""write data to a tar file
Expand Down
1 change: 1 addition & 0 deletions bookstack_file_exporter/exporter/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(self, meta: Dict[str, Union[str, int]],
# for convenience/usage for exporter
# self.name: str = self.meta['slug']
self.name = self.get_name(self.meta['slug'], self.meta['name'])
# id() is a built-in function and should not be used as a variable name
self.id_: int = self.meta['id']
self._display_name = self.meta['name']
# children
Expand Down
1 change: 1 addition & 0 deletions bookstack_file_exporter/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,5 @@ def exporter(args: argparse.Namespace):
# clean up the .tgz archive since it is already uploaded
archive.clean_up()

log.info("Created file archive: %s.tgz", archive.archive_dir)
log.info("Completed run")
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ classifiers =
[options]
python_requires = >=3.8
install_requires =
Pyyaml >= 6.0.1 # https://pypi.org/project/PyYAML/
Pydantic >= 2.8.2 # https://docs.pydantic.dev/latest/
Pyyaml >= 6.0.2 # https://pypi.org/project/PyYAML/
Pydantic >= 2.9.2 # https://docs.pydantic.dev/latest/
requests >= 2.32.3 # https://pypi.org/project/requests/
minio >= 7.2.7 # https://pypi.org/project/minio/
minio >= 7.2.10 # https://pypi.org/project/minio/
packages = find:

[options.entry_points]
Expand Down
Loading
0