diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9c41570..eede421 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,3 +9,16 @@ updates: - rchakode reviewers: - rchakode +- package-ecosystem: "docker" + directory: "/" + schedule: + interval: "daily" +- package-ecosystem: github-actions + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 5 + assignees: + - rchakode + reviewers: + - rchakode diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml deleted file mode 100644 index 313f059..0000000 --- a/.github/workflows/dependabot.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "docker" - directory: "/" - schedule: - interval: "daily" diff --git a/README.md b/README.md index 068f3c4..5208635 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Key features: -* **Consumption hourly trends, daily and monthly accounting per namespace.** This feature provides analytics metrics _tracking both actual usage and requested capacities_ over time. Metrics are namespaced-based, collected every 5 minutes, consolidated on a hourly basis for trends, from which daily and monthly accounting is processed. +* **Hourly consumption trends, daily and monthly accounting per namespace.** This feature provides analytics metrics _tracking both actual usage and requested capacities_ over time. Metrics are namespaced-based, collected every 5 minutes, consolidated on a hourly basis for trends, from which daily and monthly accounting is processed. * **Accounting of non-allocatable capacities.** At node and cluster levels, `kube-opex-analytics` tracks and consolidates the share of non-allocatable capacities and highlights them against usable capacities (i.e. capacities used by actual application workloads). In contrary to usable capacities, non-allocatable capacities are dedicated to Kubernetes operations (OS, kubelets, etc). * **Cluster usage accounting and capacity planning.** This feature makes it easy to account and visualize capacities consumed on a cluster, globally, instantly and over time. * **Usage/requests efficiency.** Based on hourly-consolidated trends, this functionality helps know how efficient resource requests set on Kubernetes workloads are, compared against the actual resource usage over time. diff --git a/backend.py b/backend.py index bbcb9b1..dee0148 100644 --- a/backend.py +++ b/backend.py @@ -18,7 +18,6 @@ import json import logging import os -import sys import threading import time import traceback @@ -45,6 +44,7 @@ def create_directory_if_not_exists(path): + """Create the given directory if it does not exist.""" try: os.makedirs(path) except OSError as e: @@ -53,7 +53,7 @@ def create_directory_if_not_exists(path): class Config: - version = '22.02.3' + version = '22.12.0' db_round_decimals = 6 db_non_allocatable = 'non-allocatable' db_billing_hourly_rate = '.billing-hourly-rate' @@ -66,6 +66,7 @@ class Config: cost_model = os.getenv('KOA_COST_MODEL', 'CUMULATIVE_RATIO') billing_currency = os.getenv('KOA_BILLING_CURRENCY_SYMBOL', '$') enable_debug = (lambda v: v.lower() in ("yes", "true"))(os.getenv('KOA_ENABLE_DEBUG', 'false')) + k8s_auth_token_file = os.getenv('KOA_K8S_AUTH_TOKEN_FILE', '/var/run/secrets/kubernetes.io/serviceaccount/token') k8s_auth_token = os.getenv('KOA_K8S_AUTH_TOKEN', 'NO_ENV_AUTH_TOKEN') k8s_auth_token_type = os.getenv('KOA_K8S_AUTH_TOKEN_TYPE', 'Bearer') k8s_auth_username = os.getenv('KOA_K8S_AUTH_USERNAME', 'NO_ENV_AUTH_USERNAME') @@ -75,29 +76,41 @@ class Config: k8s_ssl_client_cert_key = os.getenv('KOA_K8S_AUTH_CLIENT_CERT_KEY', 'NO_ENV_CLIENT_CERT_CERT') included_namespaces = [i for i in os.getenv('KOA_INCLUDED_NAMESPACES', '').replace(' ', ',').split(',') if i] excluded_namespaces = [i for i in os.getenv('KOA_EXCLUDED_NAMESPACES', '').replace(' ', ',').split(',') if i] - - def __init__(self): - self.load_rbac_auth_token() - - # handle billing rate and cost model + google_api_key = os.getenv('KOA_GOOGLE_API_KEY', 'NO_GOOGLE_API_KEY') + + def process_cost_model_config(self): + cost_model_label = 'cumulative' + cost_model_unit = '%' + if self.cost_model == 'CHARGE_BACK': + cost_model_label = 'costs' + cost_model_unit = self.billing_currency + elif self.cost_model == 'RATIO': + cost_model_label = 'normalized' + cost_model_unit = '%' + return cost_model_label, cost_model_unit + + def process_billing_hourly_rate_config(self): + """Process KOA_BILLING_HOURLY_RATE config setting.""" try: - self.billing_hourly_rate = float(os.getenv('KOA_BILLING_HOURLY_RATE')) + self.billing_hourly_rate = float(os.getenv('KOA_BILLING_HOURLY_RATE', -1)) except: - self.billing_hourly_rate = -1.0 + self.billing_hourly_rate = float(-1.0) + def __init__(self): + self.billing_hourly_rate = 0.0 + self.load_rbac_auth_token() + self.process_cost_model_config() create_directory_if_not_exists(self.frontend_data_location) + cost_model_label, cost_model_unit = self.process_cost_model_config() with open(str('%s/backend.json' % self.frontend_data_location), 'w') as fd: - if self.cost_model == 'CHARGE_BACK': - cost_model_label = 'costs' - cost_model_unit = self.billing_currency - elif self.cost_model == 'RATIO': - cost_model_label = 'normalized' - cost_model_unit = '%' - else: - cost_model_label = 'cumulative' - cost_model_unit = '%' fd.write('{"cost_model":"%s", "currency":"%s"}' % (cost_model_label, cost_model_unit)) + # check listener port + try: + self.listener_port = int(os.getenv('KOA_LISTENER_PORT')) + except: + self.listener_port = 5483 + # handle cacert file if applicable if self.k8s_verify_ssl and self.k8s_ssl_cacert and os.path.exists(self.k8s_ssl_cacert): self.koa_verify_ssl_option = self.k8s_ssl_cacert @@ -120,8 +133,9 @@ def allow_namespace(namespace): return no_namespace_included or all_namespaces_enabled or namespace_matched def load_rbac_auth_token(self): + """Load the service account token when applicable.""" try: - with open('/var/run/secrets/kubernetes.io/serviceaccount/token', 'r') as rbac_token_file: + with open(KOA_CONFIG.k8s_auth_token_file, 'r', encoding=None) as rbac_token_file: self.k8s_rbac_auth_token = rbac_token_file.read() except: self.k8s_rbac_auth_token = 'NO_ENV_TOKEN_FILE' @@ -140,13 +154,14 @@ def configure_logger(debug_enabled): log_level = logging.DEBUG else: log_level = logging.WARN + logger = logging.getLogger('kube-opex-analytics') logger.setLevel(log_level) - ch = logging.StreamHandler() - ch.setLevel(log_level) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - ch.setFormatter(formatter) - logger.addHandler(ch) + logger_handler = logging.StreamHandler() + logger_handler.setLevel(log_level) + logger_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + logger_handler.setFormatter(logger_formatter) + logger.addHandler(logger_handler) return logger @@ -158,6 +173,8 @@ def configure_logger(debug_enabled): class RrdPeriod(enum.IntEnum): + """Class RrdPeriod handles RRD settings.""" + PERIOD_5_MINS_SEC = 300 PERIOD_1_HOUR_SEC = 3600 PERIOD_1_DAY_SEC = 86400 @@ -167,8 +184,6 @@ class RrdPeriod(enum.IntEnum): # initialize Prometheus exporter - - PROMETHEUS_HOURLY_USAGE_EXPORTER = prometheus_client.Gauge('koa_namespace_hourly_usage', 'Current hourly resource usage per namespace', ['namespace', 'resource']) @@ -233,10 +248,124 @@ def download_dataset(path): @app.route('/') def render(): - return flask.render_template('index.html', koa_frontend_data_location=KOA_CONFIG.frontend_data_location, + """Render the index.html page based on Flash template.""" + return flask.render_template('index.html', + koa_frontend_data_location=KOA_CONFIG.frontend_data_location, koa_version=KOA_CONFIG.version) +def get_http_resource_or_return_none_on_error(url): + """Get a HTTP resource on its URL and return none on error.""" + data = None + try: + req = requests.get(url, params=None) + except requests.exceptions.Timeout: + + KOA_LOGGER.error("Timeout while querying %s", url) + except requests.exceptions.TooManyRedirects: + KOA_LOGGER.error("TooManyRedirects while querying %s", url) + except requests.exceptions.RequestException as ex: + exception_type = type(ex).__name__ + KOA_LOGGER.error("HTTP error (%s) => %s", exception_type, traceback.format_exc()) + + if req.status_code != 200: + KOA_LOGGER.error("Call to URL %s returned error => %s", url, req.content) + else: + data = req.content + + return data + + +def get_azure_price(node): + """Query Azure pricing API to compute node price based on its computing resources (e.g. vCPU, RAM).""" + api_base = "https://prices.azure.com/api/retail/prices?$filter=armRegionName" + api_endpoint = "{} eq '{}' and skuName eq '{}' and serviceName eq 'Virtual Machines'".format(api_base, node.region, node.instanceType) # noqa: E501 + + pricing_data = get_http_resource_or_return_none_on_error(api_endpoint) + if pricing_data is None: + return 0.0 + + pricing_json = pricing_data.json() + if pricing_json.get("Count", 0) == 0: + api_endpoint = "{} eq '{}' and skuName eq '{}{}' and serviceName eq 'Virtual Machines'".format(api_base, node.region, node.instanceType[0].lower(), node.instanceType[1:]) # noqa: E501 + + price = 0.0 + while price == 0.0: + pricing_data = get_http_resource_or_return_none_on_error(api_endpoint) + if pricing_data is None: + break + + pricing_json = pricing_data.json() + for _, item in enumerate(pricing_json["Items"]): + if node.os == "windows": + if item["type"] == "Consumption" and item["productName"].endswith('Windows'): + price = item.get('unitPrice') + elif node.os == "linux": + if item["type"] == "Consumption" and not (item["productName"].endswith('Windows')): + price = item.get('unitPrice') + + api_endpoint = pricing_json.get("NextPageLink", None) + if api_endpoint is None: + break + + return price + + +def gcp_search_price_per_page(node, skus, instance_description): + """Compute GKE node price.""" + price = 0.0 + for _, sku in skus: + if sku.get("description").startswith(instance_description): + if node.region in sku.get("serviceRegions") and sku["category"]["usageType"] == "OnDemand": + price_info = sku["pricingInfo"][0]["pricingExpression"]["tieredRates"][0] + units = float(price_info["unitPrice"]["units"]) + nanos = float(price_info["unitPrice"]["nanos"]) + price = units + nanos * 1e-9 + + return price + + +def get_gcp_price(node, memory, cpu): + """Query GCE pricing API to compute node price based on its computing capacities (e.g. vCPU, RAM).""" + cpu_price = 0.0 + memory_price = 0.0 + instance_cpu_desc = node.instanceType[:2].upper() + " Instance Core" + instance_memory_desc = node.instanceType[:2].upper() + " Instance Ram" + + base_api_endpoint = "https://cloudbilling.googleapis.com/v1/services/6F81-5844-456A/skus?key={}".format(KOA_CONFIG.google_api_key) # noqa: E501 + + pricing_data = get_http_resource_or_return_none_on_error(base_api_endpoint) + if pricing_data is None: + return 0.0 + + pricing_json = pricing_data.json() + skus = pricing_json.get('skus', None) + if skus is not None: + cpu_price = cpu * gcp_search_price_per_page(node, skus, instance_cpu_desc) + memory_price = memory * gcp_search_price_per_page(node, skus, instance_memory_desc) + + next_page_token = pricing_json.get('nextPageToken', None) + while next_page_token is not None and next_page_token != "": + api_endpoint = "{}&pageToken={}".format(base_api_endpoint, next_page_token) + + pricing_data = get_http_resource_or_return_none_on_error(api_endpoint) + if pricing_data is None: + break + + pricing_json = pricing_data.json() + skus = pricing_json.get('skus', None) + if skus is not None: + cpu_price += cpu * gcp_search_price_per_page(node, skus, instance_cpu_desc) + memory_price += memory * gcp_search_price_per_page(node, skus, instance_memory_desc) + + if cpu_price != 0.0 and memory_price != 0.0: + break + + next_page_token = pricing_json.get('nextPageToken', None) + + return cpu_price + memory_price + + class Node: def __init__(self): self.id = '' @@ -252,6 +381,12 @@ def __init__(self): self.containerRuntime = '' self.podsRunning = [] self.podsNotRunning = [] + self.region = '' + self.os = '' + self.instanceType = '' + self.aksCluster = None + self.gcpCluster = None + self.hourlyPrice = 0.0 class Pod: @@ -344,6 +479,13 @@ def __init__(self): 'None': 1 } + self.cloudCostAvailable = None + self.hourlyRate = 0.0 + self.managedControlPlanePrice = { + "AKS": 0.10, + "GKE": 0.10 + } + def decode_capacity(self, cap_input): data_length = len(cap_input) cap_unit = 'None' @@ -392,7 +534,6 @@ def extract_nodes(self, data): status = item.get('status', None) if status is not None: node.containerRuntime = status['nodeInfo']['containerRuntimeVersion'] - node.cpuCapacity = self.decode_capacity(status['capacity']['cpu']) node.cpuAllocatable = self.decode_capacity(status['allocatable']['cpu']) node.memCapacity = self.decode_capacity(status['capacity']['memory']) @@ -418,8 +559,29 @@ def extract_nodes(self, data): if cond['type'] == 'DiskPressure' and cond['status'] == 'True': node.state = 'DiskPressure' break + + # check managed cluster settings + node.region = metadata['labels']['topology.kubernetes.io/region'] + node.instanceType = metadata['labels']['node.kubernetes.io/instance-type'] + node.aksCluster = metadata['labels'].get('kubernetes.azure.com/cluster', None) + node.gcpCluster = metadata['labels'].get("cloud.google.com/gke-boot-disk", None) + + # AKS cluster processing + if node.aksCluster is not None: + self.cloudCostAvailable = "AKS" + node.hourlyPrice = get_azure_price(node) + self.hourlyRate += node.hourlyPrice + + # GKE cluster processing + if node.gcpCluster is not None and KOA_CONFIG.google_api_key != "NO_GOOGLE_API_KEY": + self.cloudCostAvailable = "GKE" + node.HourlyPrice = get_gcp_price(node, node.memCapacity * 9.5367431640625e-7, node.cpuCapacity) + self.hourlyRate += node.HourlyPrice + self.nodes[node.name] = node + self.hourlyRate += self.managedControlPlanePrice.get(self.cloudCostAvailable, 0.0) + def extract_node_metrics(self, data): # exit if not valid data if data is None: @@ -473,6 +635,7 @@ def extract_pods(self, data): pod.nodeName = item['spec']['nodeName'] pod.cpuRequest = 0.0 pod.memRequest = 0.0 + # TODO: extract initContainers for _, container in enumerate(item.get('spec').get('containers')): resources = container.get('resources', None) @@ -512,6 +675,7 @@ def consolidate_ns_usage(self): self.cpuUsageAllPods = 0.0 self.memUsageAllPods = 0.0 for pod in self.pods.values(): + if pod.nodeName is not None and hasattr(pod, 'cpuUsage') and hasattr(pod, 'memUsage'): self.cpuUsageAllPods += pod.cpuUsage self.memUsageAllPods += pod.memUsage @@ -520,23 +684,19 @@ def consolidate_ns_usage(self): if ns_pod_usage is not None: ns_pod_usage.cpu += pod.cpuUsage ns_pod_usage.mem += pod.memUsage - ns_pod_request = self.requestByNamespace.get(pod.namespace, None) if ns_pod_request is not None: ns_pod_request.cpu += pod.cpuRequest ns_pod_request.mem += pod.memRequest - pod_node = self.nodes.get(pod.nodeName, None) if pod_node is not None: pod_node.podsRunning.append(pod) - self.cpuCapacity += 0.0 self.memCapacity += 0.0 for node in self.nodes.values(): if hasattr(node, 'cpuCapacity') and hasattr(node, 'memCapacity'): self.cpuCapacity += node.cpuCapacity self.memCapacity += node.memCapacity - self.cpuAllocatable += 0.0 self.memAllocatable += 0.0 for node in self.nodes.values(): @@ -699,7 +859,7 @@ def dump_trend_analytics(dbfiles, category='usage'): fd.write('[' + ','.join(res_usage[1]) + ']') @staticmethod - def dump_histogram_analytics(dbfiles, period): + def dump_histogram_analytics(dbfiles, period, cost_model): """ Dump usage history data. @@ -714,6 +874,7 @@ def dump_histogram_analytics(dbfiles, period): requests_export = collections.defaultdict(list) requests_per_type_date = {} sum_requests_per_type_date = {} + for _, db in enumerate(dbfiles): rrd = Rrd(db_files_location=KOA_CONFIG.db_location, dbname=db) current_periodic_usage = rrd.dump_histogram_data(period=period) @@ -753,15 +914,19 @@ def dump_histogram_analytics(dbfiles, period): for res, usage_data_bundle in usage_per_type_date.items(): for date_key, db_usage_item in usage_data_bundle.items(): for db, usage_value in db_usage_item.items(): + if db != KOA_CONFIG.db_billing_hourly_rate: usage_cost = round(usage_value, KOA_CONFIG.db_round_decimals) - if KOA_CONFIG.cost_model == 'RATIO' or KOA_CONFIG.cost_model == 'CHARGE_BACK': + + if KOA_CONFIG.cost_model == 'RATIO' or cost_model == 'CHARGE_BACK': usage_ratio = usage_value / sum_usage_per_type_date[res][date_key] usage_cost = round(100 * usage_ratio, KOA_CONFIG.db_round_decimals) - if KOA_CONFIG.cost_model == 'CHARGE_BACK': + + if cost_model == 'CHARGE_BACK': usage_cost = round( usage_ratio * usage_per_type_date[res][date_key][KOA_CONFIG.db_billing_hourly_rate], KOA_CONFIG.db_round_decimals) + usage_export[res].append('{"stack":"%s","usage":%f,"date":"%s"}' % (db, usage_cost, date_key)) if Rrd.get_date_group(now_gmtime, period) == date_key: PROMETHEUS_PERIODIC_USAGE_EXPORTERS[period].labels(db, ResUsageType(res).name).set( @@ -772,15 +937,15 @@ def dump_histogram_analytics(dbfiles, period): if KOA_CONFIG.cost_model == 'RATIO' or KOA_CONFIG.cost_model == 'CHARGE_BACK': req_ratio = req_value / sum_requests_per_type_date[res][date_key] req_cost = round(100 * req_ratio, KOA_CONFIG.db_round_decimals) + if KOA_CONFIG.cost_model == 'CHARGE_BACK': req_cost = round( req_ratio * usage_per_type_date[res][date_key][KOA_CONFIG.db_billing_hourly_rate], KOA_CONFIG.db_round_decimals) - requests_export[res].append('{"stack":"%s","usage":%f,"date":"%s"}' - % (db, req_cost, date_key)) + + requests_export[res].append('{"stack":"%s","usage":%f,"date":"%s"}' % (db, req_cost, date_key)) if Rrd.get_date_group(now_gmtime, period) == date_key: - PROMETHEUS_PERIODIC_REQUESTS_EXPORTERS[period].labels(db, ResUsageType(res).name).set( - req_cost) + PROMETHEUS_PERIODIC_REQUESTS_EXPORTERS[period].labels(db, ResUsageType(res).name).set(req_cost) # noqa: E501 with open(str('%s/cpu_usage_period_%d.json' % (KOA_CONFIG.frontend_data_location, period)), 'w') as fd: fd.write('[' + ','.join(usage_export[0]) + ']') @@ -854,10 +1019,17 @@ def create_metrics_puller(): rrd = Rrd(db_files_location=KOA_CONFIG.db_location, dbname=KOA_CONFIG.db_non_allocatable) rrd.add_sample(timestamp_epoch=now_epoch, cpu_usage=cpu_non_allocatable, mem_usage=mem_non_allocatable) - # handle billing data + hourly_rate = -1 + if KOA_CONFIG.billing_hourly_rate > 0: + hourly_rate = KOA_CONFIG.billing_hourly_rate + else: + if k8s_usage.cloudCostAvailable is not None: + hourly_rate = k8s_usage.hourlyRate + rrd = Rrd(db_files_location=KOA_CONFIG.db_location, dbname=KOA_CONFIG.db_billing_hourly_rate) - rrd.add_sample(timestamp_epoch=now_epoch, cpu_usage=KOA_CONFIG.billing_hourly_rate, - mem_usage=KOA_CONFIG.billing_hourly_rate) + rrd.add_sample(timestamp_epoch=now_epoch, + cpu_usage=hourly_rate, + mem_usage=hourly_rate) # handle resource request and usage by pods for ns, ns_usage in k8s_usage.usageByNamespace.items(): @@ -886,7 +1058,7 @@ def create_metrics_puller(): KOA_LOGGER.error("%s Exception in create_metrics_puller => %s", exception_type, traceback.format_exc()) -def dump_analytics(): +def dump_analytics(cost_model_by_user=None): try: export_interval = round(1.5 * KOA_CONFIG.polling_interval_sec) while True: @@ -905,22 +1077,29 @@ def dump_analytics(): Rrd.dump_trend_analytics(ns_dbfiles, 'usage') Rrd.dump_trend_analytics(rf_dbfiles, 'rf') - Rrd.dump_histogram_analytics(dbfiles=ns_dbfiles, period=RrdPeriod.PERIOD_14_DAYS_SEC) - Rrd.dump_histogram_analytics(dbfiles=ns_dbfiles, period=RrdPeriod.PERIOD_YEAR_SEC) + + cost_model_selected = cost_model_by_user + if cost_model_by_user is None: + cost_model_selected = KOA_CONFIG.cost_model + else: + if cost_model_by_user not in ['CUMULATIVE', 'RATIO', 'CHARGE_BACK']: + cost_model_selected = 'CUMULATIVE' + KOA_LOGGER.warning("Unexpected cost model => %s (using default => CUMULATIVE)", cost_model_by_user) + + Rrd.dump_histogram_analytics(dbfiles=ns_dbfiles, period=RrdPeriod.PERIOD_14_DAYS_SEC, cost_model=cost_model_selected) # noqa: E501 + Rrd.dump_histogram_analytics(dbfiles=ns_dbfiles, period=RrdPeriod.PERIOD_YEAR_SEC, cost_model=cost_model_selected) # noqa: E501 time.sleep(export_interval) except Exception as ex: exception_type = type(ex).__name__ KOA_LOGGER.error("%s Exception in dump_analytics => %s", exception_type, traceback.format_exc()) -# validating configs -if KOA_CONFIG.cost_model == 'CHARGE_BACK' and KOA_CONFIG.billing_hourly_rate <= 0.0: - KOA_LOGGER.fatal('invalid billing hourly rate for CHARGE_BACK cost allocation') - sys.exit(1) - if __name__ == '__main__': + if KOA_CONFIG.cost_model == 'CHARGE_BACK' and KOA_CONFIG.billing_hourly_rate <= 0.0: + KOA_LOGGER.warning('Unexpected hourly rate for CHARGE_BACK => %f', KOA_CONFIG.billing_hourly_rate) + parser = argparse.ArgumentParser(description='Kubernetes Opex Analytics Backend') - parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + KOA_CONFIG.version) + parser.add_argument('-v', '--version', action='version', version='%(prog)s {}'.format(KOA_CONFIG.version)) args = parser.parse_args() th_puller = threading.Thread(target=create_metrics_puller) th_exporter = threading.Thread(target=dump_analytics) @@ -928,6 +1107,6 @@ def dump_analytics(): th_exporter.start() if not KOA_CONFIG.enable_debug: - waitress_serve(wsgi_dispatcher, listen='0.0.0.0:5483') + waitress_serve(wsgi_dispatcher, listen='0.0.0.0:{}'.format(KOA_CONFIG.listener_port)) else: - app.run(host='0.0.0.0', port=5483) + app.run(host='0.0.0.0', port=KOA_CONFIG.listener_port) diff --git a/docs/built-in-dashboards-and-charts.md b/docs/built-in-dashboards-and-charts.md index 67e4342..c4b93ae 100644 --- a/docs/built-in-dashboards-and-charts.md +++ b/docs/built-in-dashboards-and-charts.md @@ -5,7 +5,7 @@ This section describes the built-in dashboards and charts provided by `kube-opex - [Hourly Consolidated Usage Trends (7 days)](#hourly-consolidated-usage-trends-7-days) - [Hourly Usage/Requests Efficiency (7 days)](#hourly-usagerequests-efficiency-7-days) - [Daily Consumption Accounting (14 days)](#daily-consumption-accounting-14-days) - - [Monthly CPU and Memory Usage (12 months)](#monthly-cpu-and-memory-usage-12-months) + - [Monthly Consumption Accounting (12 months)](#monthly-consumption-accounting-12-months) - [Nodes' Occupation by Pods](#nodes-occupation-by-pods) - [Export Charts and Datasets (PNG, CSV, JSON)](#export-charts-and-datasets-png-csv-json) - [Dashboards and Visualization with Grafana](#dashboards-and-visualization-with-grafana) @@ -31,25 +31,29 @@ The date filter can be used to zoom out/in on a specific time range. These charts are based on data consolidated hourly thanks to sample metrics collected every five minutes from Kubernetes. ## Daily Consumption Accounting (14 days) -The daily accounting charts are provided per namespace for CPU and Memory resources and cover the last 14 days (2 weeks). +The daily accounting charts are provided per namespace for CPU and Memory resources and cover the last 14 days (2 weeks). -According to the [selected accounting model (](design-fundamentals.md#usage-accounting-models), the charts display one of the following metrics : +According to the [selected accounting model (](design-fundamentals.md#usage-accounting-models), the charts display the following metrics. The chart and the backed-data can be easily exported as an image or a CSV file (see [Export Charts and Datasets (PNG, CSV, JSON)](#export-charts-and-datasets-png-csv-json)). * Daily cumulative sum of actual hourly consumption per namespace. * Daily cumulative sum of the maximum between the actual hourly consumption and the requested capacities. -* Daily cumulative sum of actual hourly computedd from an actual cluster cost set statically based on a fixed hourly rate, or determinated dynamically from allocated resources on public clouds (nodes, storage, etc.). +* Daily cumulative sum of hourly cost computed from an actual cluster cost set statically based on a fixed hourly rate, or determinated dynamically from allocated resources on public clouds (nodes, storage, etc.). ![](../screenshots/sample-two-weeks-daily-usage.png) -## Monthly CPU and Memory Usage (12 months) -For the different namespaces discovered in the Kubernetes cluster, these charts show monthly cumulative usage for CPU and memory resources during the last 12 months. +## Monthly Consumption Accounting (12 months) -![](../screenshots/sample-one-year-monthly-usage.png) +The monthly accounting charts are provided per namespace for CPU and Memory resources and cover the last 12 months (1 year). + +According to the [selected accounting model (](design-fundamentals.md#usage-accounting-models), the charts display the following metrics. Each chart and/or the backed-data can be easily exported as an image or a CSV file (see [Export Charts and Datasets (PNG, CSV, JSON)](#export-charts-and-datasets-png-csv-json)). -The charts are based on data consolidated hourly thanks to sample metrics collected every five minutes from Kubernetes. +* Monthly cumulative sum of actual hourly consumption per namespace. +* Monthly cumulative sum of the maximum between the actual hourly consumption and the requested capacities. +* Monthly cumulative sum of hourly cost computed from an actual cluster cost set statically based on a fixed hourly rate, or determinated dynamically from allocated resources on public clouds (nodes, storage, etc.). + +![](../screenshots/sample-one-year-monthly-usage.png) -Depending on the [selected accounting model](design-fundamentals.md#usage-accounting-models), the values on these charts can be actual costs (`CHARGE_BACK` model), cumulative usage (sum of hourly consolidated usage, `CUMULATIVE_RATIO` model), or a percentage of the global cluster usage (`CHARGE_BACK` model, `100%` means the total cluster capacity). ## Nodes' Occupation by Pods For each node discovered in the Kubernetes cluster, this dashboard section displays the CPU and the memory resources currently consumed by running pods. The data are refreshed every five minutes. @@ -66,4 +70,4 @@ Any chart provided by kube-opex-analytics can be exported, either as PNG image, ![](../screenshots/export-menu.png) # Dashboards and Visualization with Grafana -In addition or alternatively to the built-in dashboards, it's also possible to [use Grafana for visualization](./prometheus-exporter-grafana-dashboard.md) thanks to the Prometheus exporter natively enabled by `kube-opex-analytics`. \ No newline at end of file +In addition or alternatively to the built-in dashboards, it's also possible to [use Grafana for visualization](./prometheus-exporter-grafana-dashboard.md) thanks to the Prometheus exporter natively enabled by `kube-opex-analytics`. diff --git a/manifests/helm/Chart.yaml b/manifests/helm/Chart.yaml index a0eb0c2..89ba6e0 100644 --- a/manifests/helm/Chart.yaml +++ b/manifests/helm/Chart.yaml @@ -2,6 +2,6 @@ apiVersion: v1 appVersion: "1.0" description: Helm chart for Kubernetes Opex Analtytics name: kube-opex-analytics -version: 22.02.3 +version: 22.12.0 diff --git a/manifests/helm/templates/deployment.yaml b/manifests/helm/templates/deployment.yaml index 3ebbd17..2f35156 100644 --- a/manifests/helm/templates/deployment.yaml +++ b/manifests/helm/templates/deployment.yaml @@ -44,6 +44,25 @@ spec: - name: {{ $key | quote }} value: {{ $val | quote }} {{- end }} + - name: "KOA_GOOGLE_API_KEY" + valueFrom: + secretKeyRef: + name: kube-opex-analytics-secrets + key: KOA_GOOGLE_API_KEY + {{- end }} + {{- if .Values.includedNamespaces }} + {{- if not .Values.envs }} + env: + {{- end }} + - name: INCLUDED_NAMESPACES + value: {{ join "," $.Values.includedNamespaces }} + {{- end }} + {{- if .Values.excludedNamespaces }} + {{- if not .Values.envs }} + env: + {{- end }} + - name: EXCLUDED_NAMESPACES + value: {{ join "," $.Values.excludedNamespaces }} {{- end }} ports: - name: http diff --git a/manifests/helm/values.yaml b/manifests/helm/values.yaml index 8e38e3a..200daee 100644 --- a/manifests/helm/values.yaml +++ b/manifests/helm/values.yaml @@ -10,12 +10,20 @@ envs: KOA_BILLING_CURRENCY_SYMBOL: $ KOA_K8S_API_VERIFY_SSL: true KOA_K8S_CACERT: /run/secrets/kubernetes.io/serviceaccount/ca.crt + KOA_INCLUDED_NAMESPACES: '' + KOA_EXCLUDED_NAMESPACES: '' dataVolume: persist: true capacity: 4Gi # storageClass: default +# overrides envs.INCLUDED_NAMESPACES +includedNamespaces: [] + +# overrides envs.EXCLUDED_NAMESPACES +excludedNamespaces: [] + prometheusOperator: enabled: false labels: diff --git a/manifests/kustomize/kustomization.yaml b/manifests/kustomize/kustomization.yaml index e6cd5ad..c5e82a4 100644 --- a/manifests/kustomize/kustomization.yaml +++ b/manifests/kustomize/kustomization.yaml @@ -6,10 +6,11 @@ namespace: kube-opex-analytics resources: - resources/kube-opex-analytics-rbac.yaml - resources/kube-opex-analytics-config.yaml + - resources/kube-opex-analytics-secrets.yaml - resources/kube-opex-analytics-sts.yaml - resources/kube-opex-analytics-service.yaml - resources/kube-opex-analytics-tests.yaml images: - name: kube-opex-analytics newName: rchakode/kube-opex-analytics - newTag: 22.02.3 + newTag: 22.12.0 diff --git a/manifests/kustomize/resources/kube-opex-analytics-config.yaml b/manifests/kustomize/resources/kube-opex-analytics-config.yaml index 7a54d18..24880f4 100644 --- a/manifests/kustomize/resources/kube-opex-analytics-config.yaml +++ b/manifests/kustomize/resources/kube-opex-analytics-config.yaml @@ -8,4 +8,6 @@ data: KOA_BILLING_CURRENCY_SYMBOL: '$' KOA_K8S_API_ENDPOINT: 'https://kubernetes.default' KOA_K8S_API_VERIFY_SSL: 'true' - KOA_K8S_CACERT: '/run/secrets/kubernetes.io/serviceaccount/ca.crt' \ No newline at end of file + KOA_K8S_CACERT: '/run/secrets/kubernetes.io/serviceaccount/ca.crt' + KOA_INCLUDED_NAMESPACES: '' + KOA_EXCLUDED_NAMESPACES: '' diff --git a/manifests/kustomize/resources/kube-opex-analytics-secrets.yaml b/manifests/kustomize/resources/kube-opex-analytics-secrets.yaml new file mode 100644 index 0000000..79c41d5 --- /dev/null +++ b/manifests/kustomize/resources/kube-opex-analytics-secrets.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: kube-opex-analytics-secrets +type: Opaque +data: + KOA_GOOGLE_API_KEY: 'S09BX0dPT0dMRV9BUElfS0VZ' diff --git a/manifests/kustomize/resources/kube-opex-analytics-sts.yaml b/manifests/kustomize/resources/kube-opex-analytics-sts.yaml index d3c851e..154ce08 100644 --- a/manifests/kustomize/resources/kube-opex-analytics-sts.yaml +++ b/manifests/kustomize/resources/kube-opex-analytics-sts.yaml @@ -62,6 +62,11 @@ spec: configMapKeyRef: key: KOA_COST_MODEL name: kube-opex-analytics-config + - name: "KOA_K8S_GOOGLE_API_KEY" + valueFrom: + secretKeyRef: + key: KOA_GOOGLE_API_KEY + name: kube-opex-analytics-secrets ports: - name: http containerPort: 5483