8000 Fix deployment pull file path issue · linsword13/ramble@cff7059 · GitHub
[go: up one dir, main page]

Skip to content

Commit cff7059

Browse files
committed
Fix deployment pull file path issue
This is tied to newer version of Python, due to behavior change of `urlunsplit`: python/cpython#85110. This causes the `join` util from Spack to behave differently: spack/spack#46453. The fix is essentially a copy from the current head of Spack, for its up-to-date `join` util implementation.
1 parent a9ac42d commit cff7059

File tree

1 file changed

+25
-184
lines changed

1 file changed

+25
-184
lines changed

lib/ramble/spack/util/url.py

Lines changed: 25 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
Utility functions for parsing, formatting, and manipulating URLs.
88
"""
99

10-
import itertools
11-
import posixpath
1210
import re
1311
import sys
1412

@@ -23,26 +21,6 @@
2321
is_windows = sys.platform == 'win32'
2422

2523

26-
def _split_all(path):
27-
"""Split path into its atomic components.
28-
29-
Returns the shortest list, L, of strings such that posixpath.join(*L) ==
30-
path and posixpath.split(element) == ('', element) for every element in L
31-
except possibly the first. This first element may possibly have the value
32-
of '/'.
33-
"""
34-
result = []
35-
a = path
36-
old_a = None
37-
while a != old_a:
38-
(old_a, (a, b)) = a, posixpath.split(a)
39-
40-
if a or b:
41-
result.insert(0, b or '/')
42-
43-
return result
44-
45-
4624
def local_file_path(url):
4725
"""Get a local file path from a url.
4826
@@ -124,168 +102,31 @@ def format(parsed_url):
124102
return parsed_url.geturl()
125103

126104

127-
def join(base_url, path, *extra, **kwargs):
128-
"""Joins a base URL with one or more local URL path components
129-
130-
If resolve_href is True, treat the base URL as though it where the locator
131-
of a web page, and the remaining URL path components as though they formed
132-
a relative URL to be resolved against it (i.e.: as in posixpath.join(...)).
133-
The result is an absolute URL to the resource to which a user's browser
134-
would navigate if they clicked on a link with an "href" attribute equal to
135-
the relative URL.
136-
137-
If resolve_href is False (default), then the URL path components are joined
138-
as in posixpath.join().
139-
140-
Note: file:// URL path components are not canonicalized as part of this
141-
operation. To canonicalize, pass the joined url to format().
142-
143-
Examples:
144-
base_url = 's3://bucket/index.html'
145-
body = fetch_body(prefix)
146-
link = get_href(body) # link == '../other-bucket/document.txt'
147-
148-
# wrong - link is a local URL that needs to be resolved against base_url
149-
spack.util.url.join(base_url, link)
150-
's3://bucket/other_bucket/document.txt'
151-
152-
# correct - resolve local URL against base_url
153-
spack.util.url.join(base_url, link, resolve_href=True)
154-
's3://other_bucket/document.txt'
155-
156-
prefix = 'https://mirror.spack.io/build_cache'
157-
158-
# wrong - prefix is just a URL prefix
159-
spack.util.url.join(prefix, 'my-package', resolve_href=True)
160-
'https://mirror.spack.io/my-package'
161-
162-
# correct - simply append additional URL path components
163-
spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
164-
'https://mirror.spack.io/build_cache/my-package'
165-
166-
# For canonicalizing file:// URLs, take care to explicitly differentiate
167-
# between absolute and relative join components.
168-
169-
# '$spack' is not an absolute path component
170-
join_result = spack.util.url.join('/a/b/c', '$spack') ; join_result
171-
'file:///a/b/c/$spack'
172-
spack.util.url.format(join_result)
173-
'file:///a/b/c/opt/spack'
174-
175-
# '/$spack' *is* an absolute path component
176-
join_result = spack.util.url.join('/a/b/c', '/$spack') ; join_result
177-
'file:///$spack'
178-
spack.util.url.format(join_result)
179-
'file:///opt/spack'
180-
"""
181-
paths = [
182-
(x) if isinstance(x, str)
183-
else x.geturl()
184-
for x in itertools.chain((base_url, path), extra)]
185-
186-
paths = [convert_to_posix_path(x) for x in paths]
187-
n = len(paths)
188-
last_abs_component = None
189-
scheme = ''
190-
for i in range(n - 1, -1, -1):
191-
obj = urllib.parse.urlparse(
192-
paths[i], scheme='', allow_fragments=False)
193-
194-
scheme = obj.scheme
195-
196-
# in either case the component is absolute
197-
if scheme or obj.path.startswith('/'):
198-
if not scheme:
199-
# Without a scheme, we have to go back looking for the
200-
# next-last component that specifies a scheme.
201-
for j in range(i - 1, -1, -1):
202-
obj = urllib.parse.urlparse(
203-
paths[j], scheme='', allow_fragments=False)
204-
205-
if obj.scheme:
206-
paths[i] = '{SM}://{NL}{PATH}'.format(
207-
SM=obj.scheme,
208-
NL=(
209-
(obj.netloc + '/')
210-
if obj.scheme != 's3' else ''),
211-
PATH=paths[i][1:])
212-
break
213-
214-
last_abs_component = i
215-
break
216-
217-
if last_abs_component is not None:
218-
paths = paths[last_abs_component:]
219-
if len(paths) == 1:
220-
result = urllib.parse.urlparse(
221-
paths[0], scheme='file', allow_fragments=False)
222-
223-
# another subtlety: If the last argument to join() is an absolute
224-
# file:// URL component with a relative path, the relative path
225-
# needs to be resolved.
226-
if result.scheme == 'file' and result.netloc:
227-
result = urllib.parse.ParseResult(
228-
scheme=result.scheme,
229-
netloc='',
230-
path=posixpath.abspath(result.netloc + result.path),
231-
params=result.params,
232-
query=result.query,
233-
fragment=None)
234-
235-
return result.geturl()
236-
237-
return _join(*paths, **kwargs)
238-
239-
240-
def _join(base_url, path, *extra, **kwargs):
241-
base_url = parse(base_url)
242-
resolve_href = kwargs.get('resolve_href', False)
243-
244-
(scheme, netloc, base_path, params, query, _) = base_url
245-
scheme = scheme.lower()
246-
247-
path_tokens = [
248-
part for part in itertools.chain(
249-
_split_all(path),
250-
itertools.chain.from_iterable(
251-
_split_all(extra_path) for extra_path in extra))
252-
if part and part != '/']
253-
254-
base_path_args = ['/fake-root']
255-
if scheme == 's3':
256-
if netloc:
257-
base_path_args.append(netloc)
258-
259-
if base_path.startswith('/'):
260-
base_path = base_path[1:]
261-
262-
base_path_args.append(base_path)
263-
264-
if resolve_href:
265-
new_base_path, _ = posixpath.split(posixpath.join(*base_path_args))
266-
base_path_args = [new_base_path]
267-
268-
base_path_args.extend(path_tokens)
269-
base_path = posixpath.relpath(posixpath.join(*base_path_args), '/fake-root')
270-
271-
if scheme == 's3':
272-
path_tokens = [
273-
part for part in _split_all(base_path)
274-
if part and part != '/']
275-
276-
if path_tokens:
277-
netloc = path_tokens.pop(0)
278-
base_path = posixpath.join('', *path_tokens)
279-
280-
if sys.platform == "win32":
281-
base_path = convert_to_posix_path(base_path)
282-
283-
return format(urllib.parse.ParseResult(scheme=scheme,
284-
netloc=netloc,
285-
path=base_path,
286-
params=params,
287-
query=query,
288-
fragment=None))
105+
def join(base: str, *components: str, resolve_href: bool = False, **kwargs) -> str:
106+
"""Convenience wrapper around ``urllib.parse.urljoin``, with a few differences:
107+
1. By default resolve_href=False, which makes the function like os.path.join: for example
108+
https://example.com/a/b + c/d = https://example.com/a/b/c/d. If resolve_href=True, the
109+
behavior is how a browser would resolve the URL: https://example.com/a/c/d.
110+
2. s3://, gs://, oci:// URLs are joined like http:// URLs.
111+
3. It accepts multiple components for convenience. Note that components[1:] are treated as
112+
literal path components and appended to components[0] separated by slashes."""
113+
# Ensure a trailing slash in the path component of the base URL to get os.path.join-like
114+
# behavior instead of web browser behavior.
115+
if not resolve_href:
116+
parsed = urllib.parse.urlparse(base)
117+
if not parsed.path.endswith("/"):
118+
base = parsed._replace(path=f"{parsed.path}/").geturl()
119+
uses_netloc = urllib.parse.uses_netloc
120+
uses_relative = urllib.parse.uses_relative
121+
try:
122+
# NOTE: we temporarily modify urllib internals so s3 and gs schemes are treated like http.
123+
# This is non-portable, and may be forward incompatible with future cpython versions.
124+
urllib.parse.uses_netloc = [*uses_netloc, "s3", "gs", "oci"]
125+
urllib.parse.uses_relative = [*uses_relative, "s3", "gs", "oci"]
126+
return urllib.parse.urljoin(base, "/".join(components), **kwargs)
127+
finally:
128+
urllib.parse.uses_netloc = uses_netloc
129+
urllib.parse.uses_relative = uses_relative
289130

290131

291132
git_re = (

0 commit comments

Comments
 (0)
0