From a724c10d5f1625c224a2340f49ca3bb632fef3f7 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Thu, 14 Nov 2024 17:32:39 +0100 Subject: [PATCH 1/6] fix usage analytics to properly register and not count if disabled --- .../localstack/utils/analytics/usage.py | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/localstack-core/localstack/utils/analytics/usage.py b/localstack-core/localstack/utils/analytics/usage.py index a1487f52578b8..633628b79f0fc 100644 --- a/localstack-core/localstack/utils/analytics/usage.py +++ b/localstack-core/localstack/utils/analytics/usage.py @@ -1,8 +1,10 @@ import datetime import math +from collections import Counter from typing import Any from localstack import config +from localstack.runtime import hooks from localstack.utils.analytics import get_session_id from localstack.utils.analytics.events import Event, EventMetadata from localstack.utils.analytics.publisher import AnalyticsClientPublisher @@ -11,6 +13,8 @@ collector_registry: dict[str, Any] = dict() # TODO: introduce some base abstraction for the counters after gather some initial experience working with it +# we could probably do intermediate aggregations over time to avoid unbounded counters for very long LS sessions +# for now, we can recommend to use config.DISABLE_EVENTS=1 class UsageSetCounter: @@ -29,19 +33,18 @@ class UsageSetCounter: namespace: str def __init__(self, namespace: str): - self.state = list() + self.enabled = not config.DISABLE_EVENTS + self.state = [] self.namespace = namespace collector_registry[namespace] = self def record(self, value: str): - self.state.append(value) + if self.enabled: + self.state.append(value) def aggregate(self) -> dict: - result = {} - for a in self.state: - result.setdefault(a, 0) - result[a] = result[a] + 1 - return result + result = Counter(self.state) + return dict(result) class UsageCounter: @@ -62,21 +65,24 @@ class UsageCounter: aggregations: list[str] def __init__(self, namespace: str, aggregations: list[str]): - self.state = list() + self.enabled = not config.DISABLE_EVENTS + self.state = [] self.namespace = namespace self.aggregations = aggregations collector_registry[namespace] = self def increment(self): - self.state.append(1) + if self.enabled: + self.state.append(1) def record_value(self, value: int | float): - self.state.append(value) + if self.enabled: + self.state.append(value) def aggregate(self) -> dict: result = {} - for aggregation in self.aggregations: - if self.state: + if self.state: + for aggregation in self.aggregations: match aggregation: case "sum": result[aggregation] = sum(self.state) @@ -101,13 +107,11 @@ def aggregate() -> dict: return aggregated_payload +@hooks.on_infra_shutdown(should_load=lambda: not config.DISABLE_EVENTS) def aggregate_and_send(): """ Aggregates data from all registered usage trackers and immediately sends the aggregated result to the analytics service. """ - if config.DISABLE_EVENTS: - return - metadata = EventMetadata( session_id=get_session_id(), client_time=str(datetime.datetime.now()), From f04328b8c1661953d68b5f42c5e2bb9a1a157412 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Fri, 15 Nov 2024 05:21:39 +0100 Subject: [PATCH 2/6] use default dict instead of list to reduce memory --- .../localstack/utils/analytics/usage.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/localstack-core/localstack/utils/analytics/usage.py b/localstack-core/localstack/utils/analytics/usage.py index 633628b79f0fc..e48eef9aa758e 100644 --- a/localstack-core/localstack/utils/analytics/usage.py +++ b/localstack-core/localstack/utils/analytics/usage.py @@ -1,6 +1,7 @@ import datetime import math -from collections import Counter +import threading +from collections import defaultdict from typing import Any from localstack import config @@ -29,22 +30,23 @@ class UsageSetCounter: my_feature_counter.aggregate() # returns {"python3.7": 1, "nodejs16.x": 2} """ - state: list[str] + state: dict[str, int] namespace: str + _lock: threading.RLock def __init__(self, namespace: str): - self.enabled = not config.DISABLE_EVENTS - self.state = [] + self.enabled = True + self.state = defaultdict(int) self.namespace = namespace - collector_registry[namespace] = self + self._lock = threading.RLock() def record(self, value: str): if self.enabled: - self.state.append(value) + with self._lock: + self.state[value] += 1 def aggregate(self) -> dict: - result = Counter(self.state) - return dict(result) + return self.state class UsageCounter: @@ -72,6 +74,8 @@ def __init__(self, namespace: str, aggregations: list[str]): collector_registry[namespace] = self def increment(self): + # TODO: we should instead have different underlying datastructures to store the state, and have no-op operations + # when config.DISABLE_EVENTS is set if self.enabled: self.state.append(1) @@ -94,7 +98,9 @@ def aggregate(self) -> dict: result[aggregation] = sum(self.state) / len(self.state) case "median": median_index = math.floor(len(self.state) / 2) - result[aggregation] = self.state[median_index] + result[aggregation] = sorted(self.state)[median_index] + case "count": + result[aggregation] = len(self.state) case _: raise Exception(f"Unsupported aggregation: {aggregation}") return result From 4df164b6b88979608317deff50939adc0b7f89ac Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Fri, 15 Nov 2024 05:47:40 +0100 Subject: [PATCH 3/6] use itertools.count --- localstack-core/localstack/utils/analytics/usage.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/localstack-core/localstack/utils/analytics/usage.py b/localstack-core/localstack/utils/analytics/usage.py index e48eef9aa758e..35d27dee3a19d 100644 --- a/localstack-core/localstack/utils/analytics/usage.py +++ b/localstack-core/localstack/utils/analytics/usage.py @@ -1,7 +1,7 @@ import datetime import math -import threading from collections import defaultdict +from itertools import count from typing import Any from localstack import config @@ -31,19 +31,18 @@ class UsageSetCounter: """ state: dict[str, int] + _counter: dict[str, count] namespace: str - _lock: threading.RLock def __init__(self, namespace: str): - self.enabled = True - self.state = defaultdict(int) + self.enabled = not config.DISABLE_EVENTS + self.state = {} + self._counter = defaultdict(count) self.namespace = namespace - self._lock = threading.RLock() def record(self, value: str): if self.enabled: - with self._lock: - self.state[value] += 1 + self.state[value] = next(self._counter[value]) def aggregate(self) -> dict: return self.state From 533d9ddd745fd01fc1d33cdef0480e83c5c7c564 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Mon, 18 Nov 2024 10:19:13 +0100 Subject: [PATCH 4/6] remove conditional loading --- localstack-core/localstack/utils/analytics/usage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/localstack-core/localstack/utils/analytics/usage.py b/localstack-core/localstack/utils/analytics/usage.py index 35d27dee3a19d..d2301d646541b 100644 --- a/localstack-core/localstack/utils/analytics/usage.py +++ b/localstack-core/localstack/utils/analytics/usage.py @@ -112,11 +112,14 @@ def aggregate() -> dict: return aggregated_payload -@hooks.on_infra_shutdown(should_load=lambda: not config.DISABLE_EVENTS) +@hooks.on_infra_shutdown() def aggregate_and_send(): """ Aggregates data from all registered usage trackers and immediately sends the aggregated result to the analytics service. """ + if not config.DISABLE_EVENTS: + return + metadata = EventMetadata( session_id=get_session_id(), client_time=str(datetime.datetime.now()), From 27c7c5294771954f9176ebd3f785888843bb5f37 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Mon, 18 Nov 2024 10:27:22 +0100 Subject: [PATCH 5/6] fix counting start --- localstack-core/localstack/utils/analytics/usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localstack-core/localstack/utils/analytics/usage.py b/localstack-core/localstack/utils/analytics/usage.py index d2301d646541b..6f1d745620b83 100644 --- a/localstack-core/localstack/utils/analytics/usage.py +++ b/localstack-core/localstack/utils/analytics/usage.py @@ -37,7 +37,7 @@ class UsageSetCounter: def __init__(self, namespace: str): self.enabled = not config.DISABLE_EVENTS self.state = {} - self._counter = defaultdict(count) + self._counter = defaultdict(lambda: count(1)) self.namespace = namespace def record(self, value: str): From 41fd4895701fbd49b90c4d603cf11aad9a6ef361 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Mon, 18 Nov 2024 10:38:15 +0100 Subject: [PATCH 6/6] oopsie --- localstack-core/localstack/utils/analytics/usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localstack-core/localstack/utils/analytics/usage.py b/localstack-core/localstack/utils/analytics/usage.py index 6f1d745620b83..e28657d793a13 100644 --- a/localstack-core/localstack/utils/analytics/usage.py +++ b/localstack-core/localstack/utils/analytics/usage.py @@ -117,7 +117,7 @@ def aggregate_and_send(): """ Aggregates data from all registered usage trackers and immediately sends the aggregated result to the analytics service. """ - if not config.DISABLE_EVENTS: + if config.DISABLE_EVENTS: return metadata = EventMetadata(