8000 chore: Batch reloads runtime (#2986) · nginx/kubernetes-ingress@7c95cd1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7c95cd1

Browse files
authored
chore: Batch reloads runtime (#2986)
* Extend batch reloads to cover multiple items in the queue during runtime * Add performance tests for batch reload * Update reloads doc * Minor test updates * PR feedback
1 parent 57cae26 commit 7c95cd1

12 files changed

+687
-35
lines changed

docs/content/intro/how-nginx-ingress-controller-works.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ A reload involves multiple steps:
315315

316316
The Ingress Controller reloads NGINX every time the Control Loop processes a change that affects the generated NGINX configuration. In general, every time a resource is changed, the Ingress Controller will regenerate the configuration and reload NGINX. A resource could be of any type the Ingress Controller monitors -- see [The Ingress Controller is a Kubernetes Controller](#the-ingress-controller-is-a-kubernetes-controller) section.
317317

318-
There are two special cases:
318+
There are three special cases:
319319
- *Start*. When the Ingress Controller starts, it processes all resources in the cluster and only then reloads NGINX. This avoids creating a "reload storm" by reloading only once.
320+
- *Batch updates*. When the Ingress Controller receives a number of concurrent requests from the Kubernetes API, it will pause NGINX reloads untill the task queue is empty. This reduces the number of reloads to minimize the impact of batch updates and reduce the risk of OOM (Out of Memory) errors.
320321
- *NGINX Plus*. If the Ingress Controller uses NGINX Plus, it will not reload NGINX Plus for changes to the Endpoints resources. In this case, the Ingress Controller will use the NGINX Plus API to update the corresponding upstreams and skip reloading.

internal/configs/configurator.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,6 +1039,11 @@ func (cnf *Configurator) EnableReloads() {
10391039
cnf.isReloadsEnabled = true
10401040
}
10411041

1042+
// DisableReloads disables NGINX reloads meaning that configuration changes will not be followed by a reload.
1043+
func (cnf *Configurator) DisableReloads() {
1044+
cnf.isReloadsEnabled = false
1045+
}
1046+
10421047
func (cnf *Configurator) reload(isEndpointsUpdate bool) error {
10431048
if !cnf.isReloadsEnabled {
10441049
return nil

internal/k8s/controller.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ type LoadBalancerController struct {
166166
configMap *api_v1.ConfigMap
167167
certManagerController *cm_controller.CmController
168168
externalDNSController *ed_controller.ExtDNSController
169+
batchSyncEnabled bool
169170
}
170171

171172
var keyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
@@ -764,6 +765,11 @@ func (lbc *LoadBalancerController) syncConfigMap(task task) {
764765
return
765766
}
766767

768+
if lbc.batchSyncEnabled {
769+
glog.V(3).Infof("Skipping ConfigMap update because batch sync is on")
770+
return
771+
}
772+
767773
lbc.updateAllConfigs()
768774
}
769775

@@ -836,6 +842,12 @@ func (lbc *LoadBalancerController) preSyncSecrets() {
836842
}
837843

838844
func (lbc *LoadBalancerController) sync(task task) {
845+
if lbc.isNginxReady && lbc.syncQueue.Len() > 1 && !lbc.batchSyncEnabled {
846+
lbc.configurator.DisableReloads()
847+
lbc.batchSyncEnabled = true
848+
849+
glog.V(3).Infof("Batch processing %v items", lbc.syncQueue.Len())
850+
}
839851
glog.V(3).Infof("Syncing %v", task.Key)
840852
if lbc.spiffeCertFetcher != nil {
841853
lbc.syncLock.Lock()
@@ -892,6 +904,14 @@ func (lbc *LoadBalancerController) sync(task task) {
892904
lbc.isNginxReady = true
893905
glog.V(3).Infof("NGINX is ready")
894906
}
907+
908+
if lbc.batchSyncEnabled && lbc.syncQueue.Len() == 0 {
909+
lbc.batchSyncEnabled = false
910+
lbc.configurator.EnableReloads()
911+
lbc.updateAllConfigs()
912+
913+
glog.V(3).Infof("Batch sync completed")
914+
}
895915
}
896916

897917
func (lbc *LoadBalancerController) syncIngressLink(task task) {

tests/Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,8 @@ create-kind-cluster: ## Create Kind cluster
7272
.PHONY: delete-kind-cluster
7373
delete-kind-cluster: ## Delete Kind cluster
7474
kind delete cluster
75+
76+
.PHONY: test-lint
77+
test-lint: ## Run Python linting tools
78+
isort .
79+
black .

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66
from kubernetes.config.kube_config import KUBE_CONFIG_DEFAULT_LOCATION
77
from settings import (
8+
BATCH_RELOAD_NUMBER,
89
BATCH_RESOURCES,
910
BATCH_START,
1011
DEFAULT_DEPLOYMENT_TYPE,
@@ -95,6 +96,12 @@ def pytest_addoption(parser) -> None:
9596
default=BATCH_RESOURCES,
9697
help="Number of VS/Ingress resources to deploy",
9798
)
99+
parser.addoption(
100+
"--batch-reload-number",
101+
action="store",
102+
default=BATCH_RELOAD_NUMBER,
103+
help="Number of reloads expected for batch reload test",
104+
)
98105
parser.addoption(
99106
"--ns-count",
100107
action="store",

tests/settings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
"""Settings below are test specific"""
2323
# Determines if batch reload tests will be ran or not
2424
BATCH_START = "False"
25-
# Number of Ingress/VS resources to deploy based on BATCH_START value, used in test_batch_startup_times.py
25+
# Number of Ingress/VS resources to deploy based on BATCH_START value, used in test_batch_startup_times.py and test_batch_reloads.py
2626
BATCH_RESOURCES = 1
27+
# Threshold for batch reloads (reloads for batch requests should be at or below this number). Used in test_batch_reloads.py
28+
BATCH_RELOAD_NUMBER = 2
2729
# Number of namespaces to deploy to measure Pod performance, used in test_multiple_ns_perf.py
2830
NS_COUNT = 0

tests/suite/resources_utils.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,20 +1210,21 @@ def create_items_from_yaml(kube_apis, yaml_manifest, namespace) -> {}:
12101210
with open(yaml_manifest) as f:
12111211
docs = yaml.safe_load_all(f)
12121212
for doc in docs:
1213-
if doc["kind"] == "Secret":
1214-
res["Secret"] = create_secret(kube_apis.v1, namespace, doc)
1215-
elif doc["kind"] == "ConfigMap":
1216-
res["ConfigMap"] = create_configmap(kube_apis.v1, namespace, doc)
1217-
elif doc["kind"] == "Ingress":
1218-
res["Ingress"] = create_ingress(kube_apis.networking_v1, namespace, doc)
1219-
elif doc["kind"] == "Service":
1220-
res["Service"] = create_service(kube_apis.v1, namespace, doc)
1221-
elif doc["kind"] == "Deployment":
1222-
res["Deployment"] = create_deployment(kube_apis.apps_v1_api, namespace, doc)
1223-
elif doc["kind"] == "DaemonSet":
1224-
res["DaemonSet"] = create_daemon_set(kube_apis.apps_v1_api, namespace, doc)
1225-
elif doc["kind"] == "Namespace":
1226-
res["Namespace"] = create_namespace(kube_apis.v1, doc)
1213+
if doc:
1214+
if doc["kind"] == "Secret":
1215+
res["Secret"] = create_secret(kube_apis.v1, namespace, doc)
1216+
elif doc["kind"] == "ConfigMap":
1217+
res["ConfigMap"] = create_configmap(kube_apis.v1, namespace, doc)
1218+
elif doc["kind"] == "Ingress":
1219+
res["Ingress"] = create_ingress(kube_apis.networking_v1, namespace, doc)
1220+
elif doc["kind"] == "Service":
1221+
res["Service"] = create_service(kube_apis.v1, namespace, doc)
1222+
elif doc["kind"] == "Deployment":
1223+
res["Deployment"] = create_deployment(kube_apis.apps_v1_api, namespace, doc)
1224+
elif doc["kind"] == "DaemonSet":
1225+
res["DaemonSet"] = create_daemon_set(kube_apis.apps_v1_api, namespace, doc)
1226+
elif doc["kind"] == "Namespace":
1227+
res["Namespace"] = create_namespace(kube_apis.v1, doc)
12271228

12281229
return res
12291230

@@ -1323,20 +1324,21 @@ def delete_items_from_yaml(kube_apis, yaml_manifest, namespace) -> None:
13231324
with open(yaml_manifest) as f:
13241325
docs = yaml.safe_load_all(f)
13251326
for doc in docs:
1326-
if doc["kind"] == "Namespace":
1327-
delete_namespace(kube_apis.v1, doc["metadata"]["name"])
1328-
elif doc["kind"] == "Secret":
1329-
delete_secret(kube_apis.v1, doc["metadata"]["name"], namespace)
1330-
elif doc["kind"] == "Ingress":
1331-
delete_ingress(kube_apis.networking_v1, doc["metadata"]["name"], namespace)
1332-
elif doc["kind"] == "Service":
1333-
delete_service(kube_apis.v1, doc["metadata"]["name"], namespace)
1334-
elif doc["kind"] == "Deployment":
1335-
delete_deployment(kube_apis.apps_v1_api, doc["metadata"]["name"], namespace)
1336-
elif doc["kind"] == "DaemonSet":
1337-
delete_daemon_set(kube_apis.apps_v1_api, doc["metadata"]["name"], namespace)
1338-
elif doc["kind"] == "ConfigMap":
1339-
delete_configmap(kube_apis.v1, doc["metadata"]["name"], namespace)
1327+
if doc:
1328+
if doc["kind"] == "Namespace":
1329+
delete_namespace(kube_apis.v1, doc["metadata"]["name"])
1330+
elif doc["kind"] == "Secret":
1331+
delete_secret(kube_apis.v1, doc["metadata"]["name"], namespace)
1332+
elif doc["kind"] == "Ingress":
1333+
delete_ingress(kube_apis.networking_v1, doc["metadata"]["name"], namespace)
1334+
elif doc["kind"] == "Service":
1335+
delete_service(kube_apis.v1, doc["metadata"]["name"], namespace)
1336+
elif doc["kind"] == "Deployment":
1337+
delete_deployment(kube_apis.apps_v1_api, doc["metadata"]["name"], namespace)
1338+
elif doc["kind"] == "DaemonSet":
1339+
delete_daemon_set(kube_apis.apps_v1_api, doc["metadata"]["name"], namespace)
1340+
elif doc["kind"] == "ConfigMap":
1341+
delete_configmap(kube_apis.v1, doc["metadata"]["name"], namespace)
13401342

13411343

13421344
def ensure_connection(request_url, expected_code=404, headers={}) -> None:

tests/suite/test_app_protect_integration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ def test_ap_enable_false_policy_correct(
228228
ap_crd_info = read_ap_custom_resource(kube_apis.custom_objects, test_namespace, "appolicies", ap_policy)
229229
assert_ap_crd_info(ap_crd_info, ap_policy)
230230
ensure_response_from_backend(appprotect_setup.req_url, ingress_host, check404=True)
231+
wait_before_test(5)
231232

232233
print("----------------------- Send request ----------------------")
233234
response = requests.get(appprotect_setup.req_url + "/<script>", headers={"host": ingress_host}, verify=False)
@@ -284,6 +285,7 @@ def test_ap_enable_false_policy_incorrect(
284285
ensure_response_from_backend(appprotect_setup.req_url, ingress_host, check404=True)
285286

286287
print("----------------------- Send request ----------------------")
288+
wait_before_test(5)
287289
response = requests.get(appprotect_setup.req_url + "/<script>", headers={"host": ingress_host}, verify=False)
288290
print(response.text)
289291
delete_items_from_yaml(kube_apis, src_ing_yaml, test_namespace)

0 commit comments

Comments
 (0)
0