8000 OpenSearch: HTTP proxy support for plugin installation by viren-nadkarni · Pull Request #11723 · localstack/localstack · GitHub
[go: up one dir, main page]

Skip to content

OpenSearch: HTTP proxy support for plugin installation #11723

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions localstack-core/localstack/services/opensearch/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
from localstack.services.opensearch import versions
from localstack.utils.archives import download_and_extract_with_retry
from localstack.utils.files import chmod_r, load_file, mkdir, rm_rf, save_file
from localstack.utils.java import (
java_system_properties_proxy,
java_system_properties_ssl,
system_properties_to_cli_args,
)
from localstack.utils.run import run
from localstack.utils.ssl import create_ssl_cert, install_predefined_cert_if_available
from localstack.utils.sync import SynchronizedDefaultDict, retry
Expand Down Expand Up @@ -65,7 +70,6 @@ def _install(self, target: InstallTarget):
tmp_archive = os.path.join(
config.dirs.cache, f"localstack.{os.path.basename(opensearch_url)}"
)
print(f"DEBUG: installing opensearch to path {install_dir_parent}")
download_and_extract_with_retry(opensearch_url, tmp_archive, install_dir_parent)
opensearch_dir = glob.glob(os.path.join(install_dir_parent, "opensearch*"))
if not opensearch_dir:
Expand All @@ -85,14 +89,27 @@ def _install(self, target: InstallTarget):
# install other default plugins for opensearch 1.1+
# https://forum.opensearch.org/t/ingest-attachment-cannot-be-installed/6494/12
if parsed_version >= "1.1.0":
# Determine network configuration to use for plugin downloads
sys_props = {
**java_system_properties_proxy(),
**java_system_properties_ssl(
os.path.join(install_dir, "jdk", "bin", "keytool"),
{"JAVA_HOME": os.path.join(install_dir, "jdk")},
),
}
java_opts = system_properties_to_cli_args(sys_props)

for plugin in OPENSEARCH_PLUGIN_LIST:
plugin_binary = os.path.join(install_dir, "bin", "opensearch-plugin")
plugin_dir = os.path.join(install_dir, "plugins", plugin)
if not os.path.exists(plugin_dir):
LOG.info("Installing OpenSearch plugin %s", plugin)

def try_install():
output = run([plugin_binary, "install", "-b", plugin])
output = run(
[plugin_binary, "install", "-b", plugin],
env_vars={"OPENSEARCH_JAVA_OPTS": " ".join(java_opts)},
)
LOG.debug("Plugin installation output: %s", output)

# We're occasionally seeing javax.net.ssl.SSLHandshakeException -> add download retries
Expand Down Expand Up @@ -241,6 +258,16 @@ def _install(self, target: InstallTarget):
mkdir(dir_path)
chmod_r(dir_path, 0o777)

# Determine network configuration to use for plugin downloads
sys_props = {
**java_system_properties_proxy(),
**java_system_properties_ssl(
os.path.join(install_dir, "jdk", "bin", "keytool"),
{"JAVA_HOME": os.path.join(install_dir, "jdk")},
),
}
java_opts = system_properties_to_cli_args(sys_props)

# install default plugins
for plugin in ELASTICSEARCH_PLUGIN_LIST:
plugin_binary = os.path.join(install_dir, "bin", "elasticsearch-plugin")
Expand All @@ -249,7 +276,10 @@ def _install(self, target: InstallTarget):
LOG.info("Installing Elasticsearch plugin %s", plugin)

def try_install():
output = run([plugin_binary, "install", "-b", plugin])
output = run(
[plugin_binary, "install", "-b", plugin],
env_vars={"ES_JAVA_OPTS": " ".join(java_opts)},
)
LOG.debug("Plugin installation output: %s", output)

# We're occasionally seeing javax.net.ssl.SSLHandshakeException -> add download retries
Expand Down
120 changes: 120 additions & 0 deletions localstack-core/localstack/utils/java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""
Utilities related to Java runtime.
"""

import logging
from os import environ
from urllib.parse import urlparse

from localstack import config
from localstack.utils.files import new_tmp_file, rm_rf
from localstack.utils.run import run

LOG = logging.getLogger(__name__)


#
# Network
#


def java_system_properties_proxy() -> dict[str, str]:
"""
Returns Java system properties for network proxy settings as per LocalStack configuration.

See: https://docs.oracle.com/javase/8/docs/technotes/guides/net/proxies.html
"""
props = {}

for scheme, default_port, var in [
("http", "80", config.OUTBOUND_HTTP_PROXY),
("https", "443", config.OUTBOUND_HTTPS_PROXY),
]:
if var:
netloc = urlparse(var).netloc
url = netloc.split(":")
if len(url) == 2:
hostname, port = url
else:
hostname, port = url[0], default_port

props[f"{scheme}.proxyHost"] = hostname
props[f"{scheme}.proxyPort"] = port
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest renaming var to what it really is (proxy_url?).
And parsing the urls is always a bit complicated (I guess because the URL spec is complicated?), but urlparse should do the hostname and port parsing for you already:

Suggested change
for scheme, default_port, var in [
("http", "80", config.OUTBOUND_HTTP_PROXY),
("https", "443", config.OUTBOUND_HTTPS_PROXY),
]:
if var:
netloc = urlparse(var).netloc
url = netloc.split(":")
if len(url) == 2:
hostname, port = url
else:
hostname, port = url[0], default_port
props[f"{scheme}.proxyHost"] = hostname
props[f"{scheme}.proxyPort"] = port
for scheme, default_port, proxy_url in [
("http", "80", config.OUTBOUND_HTTP_PROXY),
("https", "443", config.OUTBOUND_HTTPS_PROXY),
]:
if var:
parsed_proxy_url = urlparse(proxy_url)
props[f"{scheme}.proxyHost"] = parsed_proxy_url.hostname
props[f"{scheme}.proxyPort"] = parsed_proxy_url.port

If the URL does not have a port, parsed_proxy_url.port will be None

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, it's slightly simpler now with 6eeb953


return props


#
# SSL
#


def build_trust_store(
keytool_path: str, pem_bundle_path: str, env_vars: dict[str, str], store_passwd: str
) -> str:
"""
Build a TrustStore in JKS format from a PEM certificate bundle.

:param keytool_path: path to the `keytool` binary.
:param pem_bundle_path: path to the PEM bundle.
:param env_vars: environment variables passed during `keytool` execution. This should contain JAVA_HOME and other relevant variables.
:param store_passwd: store password to use.
:return: path to the truststore file.
"""
store_path = new_tmp_file(suffix=".jks")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: I don't know how expensive these operations are, and if this is worth it, but could we cache this in config.dirs.tmp (but that depends on what the store depends on). Right now we are generating the same trust store for the installation of every single plugin, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, currently it's one trust store used for a given version of Opensearch/Elasticsearch installation, reused for all its plugins.

The JRE and thus keytool executable changes based on the Opensearch/Elasticsearch version (we use the bundled JRE), it may make sense not to re-use the trust store across versions.

rm_rf(store_path)

LOG.debug("Building JKS trust store for %s at %s", pem_bundle_path, store_path)
cmd = f"{keytool_path} -importcert -trustcacerts -alias localstack -file {pem_bundle_path} -keystore {store_path} -storepass {store_passwd} -noprompt"
run(cmd, env_vars=env_vars)

return store_path


def java_system_properties_ssl(keytool_path: str, env_vars: dict[str, str]) -> dict[str, str]:
"""
Returns Java system properties for SSL settings as per LocalStack configuration.

See https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CustomizingStores
"""
props = {}

if ca_bundle := environ.get("REQUESTS_CA_BUNDLE"):
store_passwd = "localstack"
store_path = build_trust_store(keytool_path, ca_bundle, env_vars, store_passwd)
props["javax.net.ssl.trustStore"] = store_path
props["javax.net.ssl.trustStorePassword"] = store_passwd
props["javax.net.ssl.trustStoreType"] = "jks"

return props


#
# Other
#


def system_properties_to_cli_args(properties: dict[str, str]) -> list[str]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: It's a pity you can't set system properties via environment variables, otherwise this could be easily integrated generally for our Java packages by adding the properties to JavaInstallerMixin.get_java_env_vars.
ElasticSearch and OpenSearch don't use the Java installation from the package installer, but their own, but what do you think would it take to get the proxy supported for all backends using Java in the future?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have the package/installer abstractions which handle the installation aspect. Personally I think we should look into introducing some abstractions for the execution/runtime aspect. Because strictly speaking, determining and setting the proxy config falls in that area. We have the JavaInstallerMixin which kind of blurs these responsibilities. But clearly demarcating the responsibilities would let us harmonise things just like the installers.

Already there are some common patterns that have emerged, e.g. KinesisServerManager which handle multiple instances of Kinesis-Mock to allow account/region namespacing, MqttBrokerManager — same for Mosquitto, etc. It just needs fleshing out. I'll put this in the backlog.

"""
Convert a dict of Java system properties to a list of CLI arguments.

e.g.::

{
'java.sys.foo': 'bar',
'java.sys.lorem': 'ipsum'
}

returns::

[
'-Djava.sys.foo=bar',
'-Djava.sys.lorem=ipsum',
]
"""
args = []

for arg_name, arg_value in properties.items():
args.append(f"-D{arg_name}={arg_value}")

return args
65 changes: 65 additions & 0 deletions tests/unit/utils/test_java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from unittest.mock import MagicMock

from localstack import config
from localstack.utils import java


def test_java_system_properties_proxy(monkeypatch):
# Ensure various combinations of env config options are properly converted into expected sys props

monkeypatch.setattr(config, "OUTBOUND_HTTP_PROXY", "http://lorem.com:69")
monkeypatch.setattr(config, "OUTBOUND_HTTPS_PROXY", "")
output = java.java_system_properties_proxy()
assert len(output) == 2
assert output["http.proxyHost"] == "lorem.com"
assert output["http.proxyPort"] == "69"

monkeypatch.setattr(config, "OUTBOUND_HTTP_PROXY", "")
monkeypatch.setattr(config, "OUTBOUND_HTTPS_PROXY", "http://ipsum.com")
output = java.java_system_properties_proxy()
assert len(output) == 2
assert output["https.proxyHost"] == "ipsum.com"
assert output["https.proxyPort"] == "443"

# Ensure no explicit port defaults to 80
monkeypatch.setattr(config, "OUTBOUND_HTTP_PROXY", "http://baz.com")
monkeypatch.setattr(config, "OUTBOUND_HTTPS_PROXY", "http://qux.com:42")
output = java.java_system_properties_proxy()
assert len(output) == 4
assert output["http.proxyHost"] == "baz.com"
assert output["http.proxyPort"] == "80"
assert output["https.proxyHost"] == "qux.com"
assert output["https.proxyPort"] == "42"


def test_java_system_properties_ssl(monkeypatch):
mock = MagicMock()
mock.return_value = "/baz/qux"
monkeypatch.setattr(java, "build_trust_store", mock)

# Ensure that no sys props are returned if CA bundle is not set
monkeypatch.delenv("REQUESTS_CA_BUNDLE", raising=False)

output = java.java_system_properties_ssl("/path/keytool", {"enable_this": "true"})
assert output == {}
mock.assert_not_called()

# Ensure that expected sys props are returned when CA bundle is set
mock.reset_mock()
monkeypatch.setenv("REQUESTS_CA_BUNDLE", "/foo/bar")

output = java.java_system_properties_ssl("/path/to/keytool", {"disable_this": "true"})
assert len(output) == 3
assert output["javax.net.ssl.trustStore"] == "/baz/qux"
assert output["javax.net.ssl.trustStorePassword"] == "localstack"
assert output["javax.net.ssl.trustStoreType"] == "jks"
mock.assert_called_with("/path/to/keytool", "/foo/bar", {"disable_this": "true"}, "localstack")


def test_system_properties_to_cli_args():
assert java.system_properties_to_cli_args({}) == []
assert java.system_properties_to_cli_args({"foo": "bar"}) == ["-Dfoo=bar"]
assert java.system_properties_to_cli_args({"foo": "bar", "baz": "qux"}) == [
"-Dfoo=bar",
"-Dbaz=qux",
]
Loading
0