|
| 1 | +import contextlib |
| 2 | +import io |
| 3 | +import json |
| 4 | +import logging |
1 | 5 | import os
|
2 |
| -import pathlib |
| 6 | +import shutil |
3 | 7 | import sys
|
| 8 | +import traceback |
| 9 | +import typing |
4 | 10 |
|
| 11 | +import sphinx.application |
5 | 12 | from sphinx.cmd.build import main
|
6 | 13 |
|
| 14 | +WorkRequest = object |
| 15 | +WorkResponse = object |
| 16 | + |
| 17 | +logger = logging.getLogger("sphinxdocs_build") |
| 18 | + |
| 19 | +_WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext" |
| 20 | + |
| 21 | +# Config value name for getting the path to the request info file |
| 22 | +_REQUEST_INFO_CONFIG_NAME = "bazel_worker_request_info_path" |
| 23 | + |
| 24 | + |
| 25 | +class Worker: |
| 26 | + |
| 27 | + def __init__( |
| 28 | + self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str |
| 29 | + ): |
| 30 | + # NOTE: Sphinx performs its own logging re-configuration, so any |
| 31 | + # logging config we do isn't respected by Sphinx. Controlling where |
| 32 | + # stdout and stderr goes are the main mechanisms. Recall that |
| 33 | + # Bazel send worker stderr to the worker log file. |
| 34 | + # outputBase=$(bazel info output_base) |
| 35 | + # find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}' |
| 36 | + logging.basicConfig(level=logging.WARN) |
| 37 | + logger.info("Initializing worker") |
| 38 | + |
| 39 | + # The directory that paths are relative to. |
| 40 | + self._exec_root = exec_root |
| 41 | + # Where requests are read from. |
| 42 | + self._instream = instream |
| 43 | + # Where responses are written to. |
| 44 | + self._outstream = outstream |
| 45 | + |
| 46 | + # dict[str srcdir, dict[str path, str digest]] |
| 47 | + self._digests = {} |
| 48 | + |
| 49 | + # Internal output directories the worker gives to Sphinx that need |
| 50 | + # to be cleaned up upon exit. |
| 51 | + # set[str path] |
| 52 | + self._worker_outdirs = set() |
| 53 | + self._extension = BazelWorkerExtension() |
| 54 | + |
| 55 | + sys.modules[_WORKER_SPHINX_EXT_MODULE_NAME] = self._extension |
| 56 | + sphinx.application.builtin_extensions += (_WORKER_SPHINX_EXT_MODULE_NAME,) |
| 57 | + |
| 58 | + def __enter__(self): |
| 59 | + return self |
| 60 | + |
| 61 | + def __exit__(self): |
| 62 | + for worker_outdir in self._worker_outdirs: |
| 63 | + shutil.rmtree(worker_outdir, ignore_errors=True) |
| 64 | + |
| 65 | + def run(self) -> None: |
| 66 | + logger.info("Worker started") |
| 67 | + try: |
| 68 | + while True: |
| 69 | + request = None |
| 70 | + try: |
| 71 | + request = self._get_next_request() |
| 72 | + if request is None: |
| 73 | + logger.info("Empty request: exiting") |
| 74 | + break |
| 75 | + response = self._process_request(request) |
| 76 | + if response: |
| 77 | + self._send_response(response) |
| 78 | + except Exception: |
| 79 | + logger.exception("Unhandled error: request=%s", request) |
| 80 | + output = ( |
| 81 | + f"Unhandled error:\nRequest id: {request.get('id')}\n" |
| 82 | + + traceback.format_exc() |
| 83 | + ) |
| 84 | + request_id = 0 if not request else request.get("requestId", 0) |
| 85 | + self._send_response( |
| 86 | + { |
| 87 | + "exitCode": 3, |
| 88 | + "output": output, |
| 89 | + "requestId": request_id, |
| 90 | + } |
| 91 | + ) |
| 92 | + finally: |
| 93 | + logger.info("Worker shutting down") |
| 94 | + |
| 95 | + def _get_next_request(self) -> "object | None": |
| 96 | + line = self._instream.readline() |
| 97 | + if not line: |
| 98 | + return None |
| 99 | + return json.loads(line) |
| 100 | + |
| 101 | + def _send_response(self, response: "WorkResponse") -> None: |
| 102 | + self._outstream.write(json.dumps(response) + "\n") |
| 103 | + self._outstream.flush() |
| 104 | + |
| 105 | + def _prepare_sphinx(self, request): |
| 106 | + sphinx_args = request["arguments"] |
| 107 | + srcdir = sphinx_args[0] |
| 108 | + |
| 109 | + incoming_digests = {} |
| 110 | + current_digests = self._digests.setdefault(srcdir, {}) |
| 111 | + changed_paths = [] |
| 112 | + request_info = {"exec_root": self._exec_root, "inputs": request["inputs"]} |
| 113 | + for entry in request["inputs"]: |
| 114 | + path = entry["path"] |
| 115 | + digest = entry["digest"] |
| 116 | + # Make the path srcdir-relative so Sphinx understands it. |
| 117 | + path = path.removeprefix(srcdir + "/") |
| 118 | + incoming_digests[path] = digest |
| 119 | + |
| 120 | + if path not in current_digests: |
| 121 | + logger.info("path %s new", path) |
| 122 | + changed_paths.append(path) |
| 123 | + elif current_digests[path] != digest: |
| 124 | + logger.info("path %s changed", path) |
| 125 | + changed_paths.append(path) |
| 126 | + |
| 127 | + self._digests[srcdir] = incoming_digests |
| 128 | + self._extension.changed_paths = changed_paths |
| 129 | + request_info["changed_sources"] = changed_paths |
| 130 | + |
| 131 | + bazel_outdir = sphinx_args[1] |
| 132 | + worker_outdir = bazel_outdir + ".worker-out.d" |
| 133 | + self._worker_outdirs.add(worker_outdir) |
| 134 | + sphinx_args[1] = worker_outdir |
| 135 | + |
| 136 | + request_info_path = os.path.join(srcdir, "_bazel_worker_request_info.json") |
| 137 | + with open(request_info_path, "w") as fp: |
| 138 | + json.dump(request_info, fp) |
| 139 | + sphinx_args.append(f"--define={_REQUEST_INFO_CONFIG_NAME}={request_info_path}") |
| 140 | + |
| 141 | + return worker_outdir, bazel_outdir, sphinx_args |
| 142 | + |
| 143 | + @contextlib.contextmanager |
| 144 | + def _redirect_streams(self): |
| 145 | + out = io.StringIO() |
| 146 | + orig_stdout = sys.stdout |
| 147 | + try: |
| 148 | + sys.stdout = out |
| 149 | + yield out |
| 150 | + finally: |
| 151 | + sys.stdout = orig_stdout |
| 152 | + |
| 153 | + def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": |
| 154 | + logger.info("Request: %s", json.dumps(request, sort_keys=True, indent=2)) |
| 155 | + if request.get("cancel"): |
| 156 | + return None |
| 157 | + |
| 158 | + worker_outdir, bazel_outdir, sphinx_args = self._prepare_sphinx(request) |
| 159 | + |
| 160 | + # Prevent anything from going to stdout because it breaks the worker |
| 161 | + # protocol. We have limited control over where Sphinx sends output. |
| 162 | + with self._redirect_streams() as stdout: |
| 163 | + logger.info("main args: %s", sphinx_args) |
| 164 | + exit_code = main(sphinx_args) |
| 165 | + |
| 166 | + if exit_code: |
| 167 | + raise Exception( |
| 168 | + "Sphinx main() returned failure: " |
| 169 | + + f" exit code: {exit_code}\n" |
| 170 | + + "========== STDOUT START ==========\n" |
| 171 | + + stdout.getvalue().rstrip("\n") |
| 172 | + + "\n" |
| 173 | + + "========== STDOUT END ==========\n" |
| 174 | + ) |
| 175 | + |
| 176 | + # Copying is unfortunately necessary because Bazel doesn't know to |
| 177 | + # implicily bring along what the symlinks point to. |
| 178 | + shutil.copytree(worker_outdir, bazel_outdir, dirs_exist_ok=True) |
| 179 | + |
| 180 | + response = { |
| 181 | + "requestId": request.get("requestId", 0), |
| 182 | + "output": stdout.getvalue(), |
| 183 | + "exitCode": 0, |
| 184 | + } |
| 185 | + return response |
| 186 | + |
| 187 | + |
| 188 | +class BazelWorkerExtension: |
| 189 | + """A Sphinx extension implemented as a class acting like a module.""" |
| 190 | + |
| 191 | + def __init__(self): |
| 192 | + # Make it look like a Module object |
| 193 | + self.__name__ = _WORKER_SPHINX_EXT_MODULE_NAME |
| 194 | + # set[str] of src-dir relative path names |
| 195 | + self.changed_paths = set() |
| 196 | + |
| 197 | + def setup(self, app): |
| 198 | + app.add_config_value(_REQUEST_INFO_CONFIG_NAME, "", "") |
| 199 | + app.connect("env-get-outdated", self._handle_env_get_outdated) |
| 200 | + return {"parallel_read_safe": True, "parallel_write_safe": True} |
| 201 | + |
| 202 | + def _handle_env_get_outdated(self, app, env, added, changed, removed): |
| 203 | + changed = { |
| 204 | + # NOTE: path2doc returns None if it's not a doc path |
| 205 | + env.path2doc(p) |
| 206 | + for p in self.changed_paths |
| 207 | + } |
| 208 | + |
| 209 | + logger.info("changed docs: %s", changed) |
| 210 | + return changed |
| 211 | + |
| 212 | + |
| 213 | +def _worker_main(stdin, stdout, exec_root): |
| 214 | + with Worker(stdin, stdout, exec_root) as worker: |
| 215 | + return worker.run() |
| 216 | + |
| 217 | + |
| 218 | +def _non_worker_main(): |
| 219 | + args = [] |
| 220 | + for arg in sys.argv: |
| 221 | + if arg.startswith("@"): |
| 222 | + with open(arg.removeprefix("@")) as fp: |
| 223 | + lines = [line.strip() for line in fp if line.strip()] |
| 224 | + args.extend(lines) |
| 225 | + else: |
| 226 | + args.append(arg) |
| 227 | + sys.argv[:] = args |
| 228 | + return main() |
| 229 | + |
| 230 | + |
7 | 231 | if __name__ == "__main__":
|
8 |
| - sys.exit(main()) |
| 232 | + if "--persistent_worker" in sys.argv: |
| 233 | + sys.exit(_worker_main(sys.stdin, sys.stdout, os.getcwd())) |
| 234 | + else: |
| 235 | + sys.exit(_non_worker_main()) |
0 commit comments