8000 chore: emit strands metrics (#248) · 0xLiam-bit/sdk-python@e693738 · GitHub
[go: up one dir, main page]

Skip to content

Commit e693738

Browse files
authored
chore: emit strands metrics (strands-agents#248)
1 parent 76ee1ad commit e693738

File tree

5 files changed

+149
-28
lines changed

5 files changed

+149
-28
lines changed

src/strands/event_loop/event_loop.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def event_loop_cycle(
8888
kwargs["event_loop_cycle_id"] = uuid.uuid4()
8989

9090
event_loop_metrics: EventLoopMetrics = kwargs.get("event_loop_metrics", EventLoopMetrics())
91-
9291
# Initialize state and get cycle trace
9392
if "request_state" not in kwargs:
9493
kwargs["request_state"] = {}
95-
cycle_start_time, cycle_trace = event_loop_metrics.start_cycle()
94+
attributes = {"event_loop_cycle_id": str(kwargs.get("event_loop_cycle_id"))}
95+
cycle_start_time, cycle_trace = event_loop_metrics.start_cycle(attributes=attributes)
9696
kwargs["event_loop_cycle_trace"] = cycle_trace
9797

9898
callback_handler(start=True)
@@ -211,7 +211,7 @@ def event_loop_cycle(
211211
)
212212

213213
# End the cycle and return results
214-
event_loop_metrics.end_cycle(cycle_start_time, cycle_trace)
214+
event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes)
215215
if cycle_span:
216216
tracer.end_event_loop_cycle_span(
217217
span=cycle_span,
@@ -344,7 +344,6 @@ def _handle_tool_execution(
344344

345345
if not tool_uses:
346346
return stop_reason, message, event_loop_metrics, kwargs["request_state"]
347-
348347
tool_handler_process = partial(
349348
tool_handler.process,
350349
messages=messages,

src/strands/telemetry/metrics.py

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
88

99
import opentelemetry.metrics as metrics_api
10-
from opentelemetry.metrics import Counter, Meter
10+
from opentelemetry.metrics import Counter, Histogram, Meter
1111

1212
from ..telemetry import metrics_constants as constants
1313
from ..types.content import Message
@@ -121,22 +121,34 @@ class ToolMetrics:
121121
error_count: int = 0
122122
total_time: float = 0.0
123123

124-
def add_call(self, tool: ToolUse, duration: float, success: bool) -> None:
124+
def add_call(
125+
self,
126+
tool: ToolUse,
127+
duration: float,
128+
success: bool,
129+
metrics_client: "MetricsClient",
130+
attributes: Optional[Dict[str, Any]] = None,
131+
) -> None:
125132
"""Record a new tool call with its outcome.
126133
127134
Args:
128135
tool: The tool that was called.
129136
duration: How long the call took in seconds.
130137
success: Whether the call was successful.
138+
metrics_client: The metrics client for recording the metrics.
139+
attributes: attributes of the metrics.
131140
"""
132141
self.tool = tool # Update with latest tool state
133142
self.call_count += 1
134143
self.total_time += duration
135-
144+
metrics_client.tool_call_count.add(1, attributes=attributes)
145+
metrics_client.tool_duration.record(duration, attributes=attributes)
136146
if success:
137147
self.success_count += 1
148+
metrics_client.tool_success_count.add(1, attributes=attributes)
138149
else:
139150
self.error_count += 1
151+
metrics_client.tool_error_count.add(1, attributes=attributes)
140152

141153

142154
@dataclass
@@ -159,32 +171,53 @@ class EventLoopMetrics:
159171
accumulated_usage: Usage = field(default_factory=lambda: Usage(inputTokens=0, outputTokens=0, totalTokens=0))
160172
accumulated_metrics: Metrics = field(default_factory=lambda: Metrics(latencyMs=0))
161173

162-
def start_cycle(self) -> Tuple[float, Trace]:
174+
@property
175+
def _metrics_client(self) -> "MetricsClient":
176+
"""Get the singleton MetricsClient instance."""
177+
return MetricsClient()
178+
179+
def start_cycle(
180+
self,
181+
attributes: Optional[Dict[str, Any]] = None,
182+
) -> Tuple[float, Trace]:
163183
"""Start a new event loop cycle and create a trace for it.
164184
185+
Args:
186+
attributes: attributes of the metrics.
187+
165188
Returns:
166189
A tuple containing the start time and the cycle trace object.
167190
"""
191+
self._metrics_client.event_loop_cycle_count.add(1, attributes=attributes)
192+
self._metrics_client.event_loop_start_cycle.add(1, attributes=attributes)
168193
self.cycle_count += 1
169194
start_time = time.time()
170195
cycle_trace = Trace(f"Cycle {self.cycle_count}", start_time=start_time)
171196
self.traces.append(cycle_trace)
172197
return start_time, cycle_trace
173198

174-
def end_cycle(self, start_time: float, cycle_trace: Trace) -> None:
199+
def end_cycle(self, start_time: float, cycle_trace: Trace, attributes: Optional[Dict[str, Any]] = None) -> None:
175200
"""End the current event loop cycle and record its duration.
176201
177202
Args:
178203
start_time: The timestamp when the cycle started.
179204
cycle_trace: The trace object for this cycle.
205+
attributes: attributes of the metrics.
180206
"""
207+
self._metrics_client.event_loop_end_cycle.add(1, attributes)
181208
end_time = time.time()
182209
duration = end_time - start_time
210+
self._metrics_client.event_loop_cycle_duration.record(duration, attributes)
183211
self.cycle_durations.append(duration)
184212
cycle_trace.end(end_time)
185213

186214
def add_tool_usage(
187-
self, tool: ToolUse, duration: float, tool_trace: Trace, success: bool, message: Message
215+
self,
216+
tool: ToolUse,
217+
duration: float,
218+
tool_trace: Trace,
219+
success: bool,
220+
message: Message,
188221
) -> None:
189222
"""Record metrics for a tool invocation.
190223
@@ -207,8 +240,16 @@ def add_tool_usage(
207240
tool_trace.raw_name = f"{tool_name} - {tool_use_id}"
208241
tool_trace.add_message(message)
209242

210-
self.tool_metrics.setdefault(tool_name, ToolMetrics(tool)).add_call(tool, duration, success)
211-
243+
self.tool_metrics.setdefault(tool_name, ToolMetrics(tool)).add_call(
244+
tool,
245+
duration,
246+
success,
247+
self._metrics_client,
248+
attributes={
249+
"tool_name": tool_name,
250+
"tool_use_id": tool_use_id,
251+
},
252+
)
212253
tool_trace.end()
213254

214255
def update_usage(self, usage: Usage) -> None:
@@ -217,6 +258,8 @@ def update_usage(self, usage: Usage) -> None:
217258
Args:
218259
usage: The usage data to add to the accumulated totals.
219260
"""
261+
self._metrics_client.event_loop_input_tokens.record(usage["inputTokens"])
262+
self._metrics_client.event_loop_output_tokens.record(usage["outputTokens"])
220263
self.accumulated_usage["inputTokens"] += usage["inputTokens"]
221264
self.accumulated_usage["outputTokens"] += usage["outputTokens"]
222265
self.accumulated_usage["totalTokens"] += usage["totalTokens"]
@@ -227,6 +270,7 @@ def update_metrics(self, metrics: Metrics) -> None:
227270
Args:
228271
metrics: The metrics data to add to the accumulated totals.
229272
"""
273+
self._metrics_client.event_loop_latency.record(metrics["latencyMs"])
230274
self.accumulated_metrics["latencyMs"] += metrics["latencyMs"]
231275

232276
def get_summary(self) -> Dict[str, Any]:
@@ -370,7 +414,18 @@ class MetricsClient:
370414

371415
_instance: Optional["MetricsClient"] = None
372416
meter: Meter
373-
strands_agent_invocation_count: Counter
417+
event_loop_cycle_count: Counter
418+
event_loop_start_cycle: Counter
419+
event_loop_end_cycle: Counter
420+
event_loop_cycle_duration: Histogram
421+
event_loop_latency: Histogram
422+
event_loop_input_tokens: Histogram
423+
event_loop_output_tokens: Histogram
424+
425+
tool_call_count: Counter
426+
tool_success_count: Counter
427+
tool_error_count: Counter
428+
tool_duration: Histogram
374429

375430
def __new__(cls) -> "MetricsClient":
376431
"""Create or return the singleton instance of MetricsClient.
@@ -398,6 +453,24 @@ def __init__(self) -> None:
398453

399454
def create_instruments(self) -> None:
400455
"""Create and initialize all OpenTelemetry metric instruments."""
401-
self.strands_agent_invocation_count = self.meter.create_counter(
402-
name=constants.STRANDS_AGENT_INVOCATION_COUNT, unit="Count"
456+
self.event_loop_cycle_count = self.meter.create_counter(
457+
name=constants.STRANDS_EVENT_LOOP_CYCLE_COUNT, unit="Count"
458+
)
459+
self.event_loop_start_cycle = self.meter.create_counter(
460+
name=constants.STRANDS_EVENT_LOOP_START_CYCLE, unit="Count"
461+
)
462+
self.event_loop_end_cycle = self.meter.create_counter(name=constants.STRANDS_EVENT_LOOP_END_CYCLE, unit="Count")
463+
self.event_loop_cycle_duration = self.meter.create_histogram(
464+
name=constants.STRANDS_EVENT_LOOP_CYCLE_DURATION, unit="s"
465+
)
466+
self.event_loop_latency = self.meter.create_histogram(name=constants.STRANDS_EVENT_LOOP_LATENCY, unit="ms")
467+
self.tool_call_count = self.meter.create_counter(name=constants.STRANDS_TOOL_CALL_COUNT, unit="Count")
468+
self.tool_success_count = self.meter.create_counter(name=constants.STRANDS_TOOL_SUCCESS_COUNT, unit="Count")
469+
self.tool_error_count = self.meter.create_counter(name=constants.STRANDS_TOOL_ERROR_COUNT, unit="Count")
470+
self.tool_duration = self.meter.create_histogram(name=constants.STRANDS_TOOL_DURATION, unit="s")
471+
self.event_loop_input_tokens = self.meter.create_histogram(
472+
name=constants.STRANDS_EVENT_LOOP_INPUT_TOKENS, unit="token"
473+
)
474+
self.event_loop_output_tokens = self.meter.create_histogram(
475+
name=constants.STRANDS_EVENT_LOOP_OUTPUT_TOKENS, unit="token"
403476
)
Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1-
"""Metrics that are emitted in Strands-Agent."""
1+
"""Metrics that are emitted in Strands-Agents."""
22

3-
STRANDS_AGENT_INVOCATION_COUNT = "strands.agent.invocation_count"
3+
STRANDS_EVENT_LOOP_CYCLE_COUNT = "strands.event_loop.cycle_count"
4+
STRANDS_EVENT_LOOP_START_CYCLE = "strands.event_loop.start_cycle"
5+
STRANDS_EVENT_LOOP_END_CYCLE = "strands.event_loop.end_cycle"
6+
STRANDS_TOOL_CALL_COUNT = "strands.tool.call_count"
7+
STRANDS_TOOL_SUCCESS_COUNT = "strands.tool.success_count"
8+
STRANDS_TOOL_ERROR_COUNT = "strands.tool.error_count"
9+
10+
# Histograms
11+
STRANDS_EVENT_LOOP_LATENCY = "strands.event_loop.latency"
12+
STRANDS_TOOL_DURATION = "strands.tool.duration"
13+
STRANDS_EVENT_LOOP_CYCLE_DURATION = "strands.event_loop.cycle_duration"
14+
STRANDS_EVENT_LOOP_INPUT_TOKENS = "strands.event_loop.input.tokens"
15+
STRANDS_EVENT_LOOP_OUTPUT_TOKENS = "strands.event_loop.output.tokens"

tests/strands/telemetry/test_metrics.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,19 @@ def test_trace_end(mock_time, end_time, trace):
124124
@pytest.fixture
125125
def mock_get_meter_provider():
126126
with mock.patch("strands.telemetry.metrics.metrics_api.get_meter_provider") as mock_get_meter_provider:
127+
MetricsClient._instance = None
127128
meter_provider_mock = mock.MagicMock(spec=MeterProvider)
128-
mock_get_meter_provider.return_value = meter_provider_mock
129129

130130
mock_meter = mock.MagicMock()
131+
mock_create_counter = mock.MagicMock()
132+
mock_meter.create_counter.return_value = mock_create_counter
133+
134+
mock_create_histogram = mock.MagicMock()
135+
mock_meter.create_histogram.return_value = mock_create_histogram
131136
meter_provider_mock.get_meter.return_value = mock_meter
132137

138+
mock_get_meter_provider.return_value = meter_provider_mock
139+
133140
yield mock_get_meter_provider
134141

135142

@@ -190,11 +197,14 @@ def test_trace_to_dict(trace):
190197

191198

192199
@pytest.mark.parametrize("success", [True, False])
193-
def test_tool_metrics_add_call(success, tool, tool_metrics):
200+
def test_tool_metrics_add_call(success, tool, tool_metrics, mock_get_meter_provider):
194201
tool = dict(tool, **{"name": "updated"})
195202
duration = 1
203+
metrics_client = MetricsClient()
204+
205+
attributes = {"foo": "bar"}
196206

197-
tool_metrics.add_call(tool, duration, success)
207+
tool_metrics.add_call(tool, duration, success, metrics_client, attributes=attributes)
198208

199209
tru_attrs = dataclasses.asdict(tool_metrics)
200210
exp_attrs = {
@@ -205,12 +215,17 @@ def test_tool_metrics_add_call(success, tool, tool_metrics):
205215
"total_time": duration,
206216
}
207217

218+
mock_get_meter_provider.return_value.get_meter.assert_called()
219+
metrics_client.tool_call_count.add.assert_called_with(1, attributes=attributes)
220+
metrics_client.tool_duration.record.assert_called_with(duration, attributes=attributes)
221+
if success:
222+
metrics_client.tool_success_count.add.assert_called_with(1, attributes=attributes)
208223
assert tru_attrs == exp_attrs
209224

210225

211226
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
212227
@unittest.mock.patch.object(strands.telemetry.metrics.uuid, "uuid4")
213-
def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metrics):
228+
def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metrics, mock_get_meter_provider):
214229
mock_time.return_value = 1
215230
mock_uuid4.return_value = "i1"
216231

@@ -220,6 +235,8 @@ def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metric
220235
tru_attrs = {"cycle_count": event_loop_metrics.cycle_count, "traces": event_loop_metrics.traces}
221236
exp_attrs = {"cycle_count": 1, "traces": [tru_cycle_trace]}
222237

238+
mock_get_meter_provider.return_value.get_meter.assert_called()
239+
event_loop_metrics._metrics_client.event_loop_cycle_count.add.assert_called()
223240
assert (
224241
tru_start_time == exp_start_time
225242
and tru_cycle_trace.to_dict() == exp_cycle_trace.to_dict()
@@ -228,10 +245,11 @@ def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metric
228245

229246

230247
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
231-
def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics):
248+
def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics, mock_get_meter_provider):
232249
mock_time.return_value = 1
233250

234-
event_loop_metrics.end_cycle(start_time=0, cycle_trace=trace)
251+
attributes = {"foo": "bar"}
252+
event_loop_metrics.end_cycle(start_time=0, cycle_trace=trace, attributes=attributes)
235253

236254
tru_cycle_durations = event_loop_metrics.cycle_durations
237255
exp_cycle_durations = [1]
@@ -243,17 +261,23 @@ def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics):
243261

244262
assert tru_trace_end_time == exp_trace_end_time
245263

264+
mock_get_meter_provider.return_value.get_meter.assert_called()
265+
metrics_client = event_loop_metrics._metrics_client
266+
metrics_client 10000 .event_loop_end_cycle.add.assert_called_with(1, attributes)
267+
metrics_client.event_loop_cycle_duration.record.assert_called()
268+
246269

247270
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
248-
def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_metrics):
271+
def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_metrics, mock_get_meter_provider):
249272
mock_time.return_value = 1
250-
251273
duration = 1
252274
success = True
253275
message = {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "tool_name": "tool1"}}]}
254276

255277
event_loop_metrics.add_tool_usage(tool, duration, trace, success, message)
256278

279+
mock_get_meter_provider.return_value.get_meter.assert_called()
280+
257281
tru_event_loop_metrics_attrs = {"tool_metrics": event_loop_metrics.tool_metrics}
258282
exp_event_loop_metrics_attrs = {
259283
"tool_metrics": {
@@ -286,7 +310,7 @@ def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_me
286310
assert tru_trace_attrs == exp_trace_attrs
287311

288312

289-
def test_event_loop_metrics_update_usage(usage, event_loop_metrics):
313+
def test_event_loop_metrics_update_usage(usage, event_loop_metrics, mock_get_meter_provider):
290314
for _ in range(3):
291315
event_loop_metrics.update_usage(usage)
292316

@@ -298,9 +322,13 @@ def test_event_loop_metrics_update_usage(usage, event_loop_metrics):
298322
)
299323

300324
assert tru_usage == exp_usage
325+
mock_get_meter_provider.return_value.get_meter.assert_called()
326+
metrics_client = event_loop_metrics._metrics_client
327+
metrics_client.event_loop_input_tokens.record.assert_called()
328+
metrics_client.event_loop_output_tokens.record.assert_called()
301329

302330

303-
def test_event_loop_metrics_update_metrics(metrics, event_loop_metrics):
331+
def test_event_loop_metrics_update_metrics(metrics, event_loop_metrics, mock_get_meter_provider):
304332
for _ in range(3):
305333
event_loop_metrics.update_metrics(metrics)
306334

@@ -310,9 +338,11 @@ def test_event_loop_metrics_update_metrics(metrics, event_loop_metrics):
310338
)
311339

312340
assert tru_metrics == exp_metrics
341+
mock_get_meter_provider.return_value.get_meter.assert_called()
342+
event_loop_metrics._metrics_client.event_loop_latency.record.assert_called_with(1)
313343

314344

315-
def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics):
345+
def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics, mock_get_meter_provider):
316346
duration = 1
317347
success = True
318348
message = {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "tool_name": "tool1"}}]}

tests/strands/tools/test_executor.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ def tool_uses(request, tool_use):
4343
return request.param if hasattr(request, "param") else [tool_use]
4444

4545

46+
@pytest.fixture
47+
def mock_metrics_client():
48+
with unittest.mock.patch("strands.telemetry.MetricsClient") as mock_metrics_client:
49+
yield mock_metrics_client
50+
51+
4652
@pytest.fixture
4753
def event_loop_metrics():
4854
return strands.telemetry.metrics.EventLoopMetrics()
@@ -303,6 +309,7 @@ def test_run_tools_creates_and_ends_span_on_success(
303309
mock_get_tracer,
304310
tool_handler,
305311
tool_uses,
312+
mock_metrics_client,
306313
event_loop_metrics,
307314
request_state,
308315
invalid_tool_use_ids,

0 commit comments

Comments
 (0)
0