8000 chore: emit strands metrics · strands-agents/sdk-python@f361135 · GitHub
[go: up one dir, main page]

Skip to content

Commit f361135

Browse files
committed
chore: emit strands metrics
1 parent 4d7bb98 commit f361135

File tree

5 files changed

+128
-26
lines changed

5 files changed

+128
-26
lines changed

src/strands/event_loop/event_loop.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from functools import partial
1414
from typing import Any, Callable, Dict, List, Optional, Tuple, cast
1515

16+
from ..telemetry import MetricsClient
1617
from ..telemetry.metrics import EventLoopMetrics, Trace
1718
from ..telemetry.tracer import get_tracer
1819
from ..tools.executor import run_tools, validate_and_prepare_tools
@@ -105,10 +106,14 @@ def event_loop_cycle(
105106
kwargs["event_loop_cycle_id"] = uuid.uuid4()
106107

107108
event_loop_metrics: EventLoopMetrics = kwargs.get("event_loop_metrics", EventLoopMetrics())
108-
109+
metrics_client = MetricsClient()
109110
# Initialize state and get cycle trace
110111
kwargs = initialize_state(**kwargs)
111-
cycle_start_time, cycle_trace = event_loop_metrics.start_cycle()
112+
113+
attributes = {"event_loop_cycle_id": str(kwargs.get("event_loop_cycle_id"))}
114+
cycle_start_time, cycle_trace = event_loop_metrics.start_cycle(metrics_client)
115+
metrics_client.event_loop_cycle_count.add(1, attributes=attributes)
116+
metrics_client.event_loop_start_cycle.add(1, attributes=attributes)
112117
kwargs["event_loop_cycle_trace"] = cycle_trace
113118

114119
callback_handler(start=True)
@@ -227,7 +232,7 @@ def event_loop_cycle(
227232
)
228233

229234
# End the cycle and return results
230-
event_loop_metrics.end_cycle(cycle_start_time, cycle_trace)
235+
event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, metrics_client)
231236
if cycle_span:
232237
tracer.end_event_loop_cycle_span(
233238
span=cycle_span,
@@ -380,7 +385,7 @@ def _handle_tool_execution(
380385

381386
if not tool_uses:
382387
return stop_reason, message, event_loop_metrics, kwargs["request_state"]
383-
388+
metrics_client = MetricsClient()
384389
tool_handler_process = partial(
385390
tool_handler.process,
386391
messages=messages,
@@ -418,7 +423,7 @@ def _handle_tool_execution(
418423
tracer.end_event_loop_cycle_span(span=cycle_span, message=message, tool_result_message=tool_result_message)
419424

420425
if kwargs["request_state"].get("stop_event_loop", False):
421-
event_loop_metrics.end_cycle(cycle_start_time, cycle_trace)
426+
event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, metrics_client)
422427
return stop_reason, message, event_loop_metrics, kwargs["request_state"]
423428

424429
return recurse_event_loop(

src/strands/telemetry/metrics.py

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
88

99
import opentelemetry.metrics as metrics_api
10-
from opentelemetry.metrics import Counter, Meter
10+
from opentelemetry.metrics import Counter, Histogram, Meter
1111

1212
from ..telemetry import metrics_constants as constants
1313
from ..types.content import Message
@@ -121,22 +121,34 @@ class ToolMetrics:
121121
error_count: int = 0
122122
total_time: float = 0.0
123123

124-
def add_call(self, tool: ToolUse, duration: float, success: bool) -> None:
124+
def add_call(
125+
self,
126+
tool: ToolUse,
127+
duration: float,
128+
success: bool,
129+
metrics_client: "MetricsClient",
130+
attributes: Optional[Dict[str, Any]] = None,
131+
) -> None:
125132
"""Record a new tool call with its outcome.
126133
127134
Args:
128135
tool: The tool that was called.
129136
duration: How long the call took in seconds.
130137
success: Whether the call was successful.
138+
metrics_client: The metrics client for recording the metrics.
139+
attributes: attributes of the metrics.
131140
"""
132141
self.tool = tool # Update with latest tool state
133142
self.call_count += 1
134143
self.total_time += duration
135-
144+
metrics_client.tool_call_count.add(1, attributes=attributes)
145+
metrics_client.tool_duration.record(duration, attributes=attributes)
136146
if success:
137147
self.success_count += 1
148+
metrics_client.tool_success_count.add(1, attributes=attributes)
138149
else:
139150
self.error_count += 1
151+
metrics_client.tool_error_count.add(1, attributes=attributes)
140152

141153

142154
@dataclass
@@ -159,32 +171,42 @@ class EventLoopMetrics:
159171
accumulated_usage: Usage = field(default_factory=lambda: Usage(inputTokens=0, outputTokens=0, totalTokens=0))
160172
accumulated_metrics: Metrics = field(default_factory=lambda: Metrics(latencyMs=0))
161173

162-
def start_cycle(self) -> Tuple[float, Trace]:
174+
def start_cycle(self, metrics_client: "MetricsClient") -> Tuple[float, Trace]:
163175
"""Start a new event loop cycle and create a trace for it.
164176
165177
Returns:
166178
A tuple containing the start time and the cycle trace object.
167179
"""
180+
metrics_client.event_loop_cycle_count.add(1)
168181
self.cycle_count += 1
169182
start_time = time.time()
170183
cycle_trace = Trace(f"Cycle {self.cycle_count}", start_time=start_time)
171184
self.traces.append(cycle_trace)
172185
return start_time, cycle_trace
173186

174-
def end_cycle(self, start_time: float, cycle_trace: Trace) -> None:
187+
def end_cycle(self, start_time: float, cycle_trace: Trace, metrics_client: "MetricsClient") -> None:
175188
"""End the current event loop cycle and record its duration.
176189
177190
Args:
178191
start_time: The timestamp when the cycle started.
179192
cycle_trace: The trace object for this cycle.
193+
metrics_client: The metrics client for recording the metrics.
180194
"""
195+
metrics_client.event_loop_end_cycle.add(1)
181196
end_time = time.time()
182197
duration = end_time - start_time
198+
metrics_client.event_loop_cycle_duration.record(duration)
183199
self.cycle_durations.append(duration)
184200
cycle_trace.end(end_time)
185201

186202
def add_tool_usage(
187-
self, tool: ToolUse, duration: float, tool_trace: Trace, success: bool, message: Message
203+
self,
204+
tool: ToolUse,
205+
duration: float,
206+
tool_trace: Trace,
207+
success: bool,
208+
message: Message,
209+
metrics_client: "MetricsClient",
188210
) -> None:
189211
"""Record metrics for a tool invocation.
190212
@@ -194,6 +216,7 @@ def add_tool_usage(
194216
tool_trace: The trace object for this tool call.
195217
success: Whether the tool call was successful.
196218
message: The message associated with the tool call.
219+
metrics_client: The metrics client for recording the metrics.
197220
"""
198221
tool_name = tool.get("name", "unknown_tool")
199222
tool_use_id = tool.get("toolUseId", "unknown")
@@ -207,8 +230,16 @@ def add_tool_usage(
207230
tool_trace.raw_name = f"{tool_name} - {tool_use_id}"
208231
tool_trace.add_message(message)
209232

210-
self.tool_metrics.setdefault(tool_name, ToolMetrics(tool)).add_call(tool, duration, success)
211-
233+
self.tool_metrics.setdefault(tool_name, ToolMetrics(tool)).add_call(
234+
tool,
235+
duration,
236+
success,
237+
metrics_client,
238+
attributes={
239+
"tool_name": tool_name,
240+
"tool_use_id": tool_use_id,
241+
},
242+
)
212243
tool_trace.end()
213244

214245
def update_usage(self, usage: Usage) -> None:
@@ -217,6 +248,7 @@ def update_usage(self, usage: Usage) -> None:
217248
Args:
218249
usage: The usage data to add to the accumulated totals.
219250
"""
251+
# metrics_client.token_usage.add(usage["totalTokens"])
220252
self.accumulated_usage["inputTokens"] += usage["inputTokens"]
221253
self.accumulated_usage["outputTokens"] += usage["outputTokens"]
222254
self.accumulated_usage["totalTokens"] += usage["totalTokens"]
@@ -371,6 +403,14 @@ class MetricsClient:
371403
_instance: Optional["MetricsClient"] = None
372404
meter: Meter
373405
strands_agent_invocation_count: Counter
406+
event_loop_cycle_count: Counter
407+
event_loop_start_cycle: Counter
408+
event_loop_end_cycle: Counter
409+
event_loop_cycle_duration: Histogram
410+
tool_call_count: Counter
411+
tool_success_count: Counter
412+
tool_error_count: Counter
413+
tool_duration: Histogram
374414

375415
def __new__(cls) -> "MetricsClient":
376416
"""Create or return the singleton instance of MetricsClient.
@@ -401,3 +441,21 @@ def create_instruments(self) -> None:
401441
self.strands_agent_invocation_count = self.meter.create_counter(
402442
name=constants.STRANDS_AGENT_INVOCATION_COUNT, unit="Count"
403443
)
444+
self.event_loop_cycle_count = self.meter.create_counter(
445+
name=constants.STRANDS_AGENT_EVENT_LOOP_CYCLE_COUNT, unit="Count"
446+
)
447+
self.event_loop_start_cycle = self.meter.create_counter(
448+
name=constants.STRANDS_AGENT_EVENT_LOOP_START_CYCLE, unit="Count"
449+
)
450+
self.event_loop_end_cycle = self.meter.create_counter(
451+
name=constants.STRANDS_AGENT_EVENT_LOOP_END_CYCLE, unit="Count"
452+
)
453+
self.event_loop_cycle_duration = self.meter.create_histogram(
454+
name=constants.STRANDS_AGENT_EVENT_LOOP_CYCLE_DURATION, unit="s"
455+
)
456+
self.tool_call_count = self.meter.create_counter(name=constants.STRANDS_AGENT_TOOL_CALL_COUNT, unit="Count")
457+
self.tool_success_count = self.meter.create_counter(
458+
name=constants.STRANDS_AGENT_TOOL_SUCCESS_COUNT, unit="Count"
459+
)
460+
self.tool_error_count = self.meter.create_counter(name=constants.STRANDS_AGENT_TOOL_ERROR_COUNT, unit="Count")
461+
self.tool_duration = self.meter.create_histogram(name=constants.STRANDS_AGENT_TOOL_DURATION, unit="s")
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
"""Metrics that are emitted in Strands-Agent."""
22

33
STRANDS_AGENT_INVOCATION_COUNT = "strands.agent.invocation_count"
4+
STRANDS_AGENT_EVENT_LOOP_CYCLE_COUNT = "strands.agent.event_loop.cycle_count"
5+
STRANDS_AGENT_EVENT_LOOP_START_CYCLE = "strands.agent.event_loop.start_cycle"
6+
STRANDS_AGENT_EVENT_LOOP_END_CYCLE = "strands.agent.event_loop.end_cycle"
7+
STRANDS_AGENT_EVENT_LOOP_CYCLE_DURATION = "strands.agent.event_loop.cycle_duration"
8+
STRANDS_AGENT_TOOL_CALL_COUNT = "strands.agent.tool.call_count"
9+
STRANDS_AGENT_TOOL_SUCCESS_COUNT = "strands.agent.tool.success_count"
10+
STRANDS_AGENT_TOOL_ERROR_COUNT = "strands.agent.tool.error_count"
11+
STRANDS_AGENT_TOOL_DURATION = "strands.agent.tool.duration"

src/strands/tools/executor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from opentelemetry import trace
99

10+
from ..telemetry import MetricsClient
1011
from ..telemetry.metrics import EventLoopMetrics, Trace
1112
from ..telemetry.tracer import get_tracer
1213
from ..tools.tools import InvalidToolUseNameException, validate_tool_use
@@ -51,6 +52,7 @@ def _handle_tool_execution(tool: ToolUse) -> Tuple[bool, Optional[ToolResult]]:
5152

5253
tracer = get_tracer()
5354
tool_call_span = tracer.start_tool_call_span(tool, parent_span)
55+
metrics_client = MetricsClient()
5456

5557
try:
5658
if "toolUseId" not in tool or tool["toolUseId"] not in invalid_tool_use_ids:
@@ -64,7 +66,9 @@ def _handle_tool_execution(tool: ToolUse) -> Tuple[bool, Optional[ToolResult]]:
6466

6567
tool_duration = time.time() - tool_start_time
6668
message = Message(role="user", content=[{"toolResult": result}])
67-
event_loop_metrics.add_tool_usage(tool, tool_duration, tool_trace, tool_success, message)
69+
event_loop_metrics.add_tool_usage(
70+
tool, tool_duration, tool_trace, tool_success, message, metrics_client
71+
)
6872
cycle_trace.add_child(tool_trace)
6973

7074
if tool_call_span:

tests/strands/telemetry/test_metrics.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,19 @@ def test_trace_end(mock_time, end_time, trace):
124124
@pytest.fixture
125125
def mock_get_meter_provider():
126126
with mock.patch("strands.telemetry.metrics.metrics_api.get_meter_provider") as mock_get_meter_provider:
127+
MetricsClient._instance = None
127128
meter_provider_mock = mock.MagicMock(spec=MeterProvider)
128-
mock_get_meter_provider.return_value = meter_provider_mock
129129

130130
mock_meter = mock.MagicMock()
131+
mock_create_counter = mock.MagicMock()
132+
mock_meter.create_counter.return_value = mock_create_counter
133+
134+
mock_create_histogram = mock.MagicMock()
135+
mock_meter.create_histogram.return_value = mock_create_histogram
131136
meter_provider_mock.get_meter.return_value = mock_meter
132137

138+
mock_get_meter_provider.return_value = meter_provider_mock
139+
133140
yield mock_get_meter_provider
134141

135142

@@ -190,11 +197,14 @@ def test_trace_to_dict(trace):
190197

191198

192199
@pytest.mark.parametrize("success", [True, False])
193-
def test_tool_metrics_add_call(success, tool, tool_metrics):
200+
def test_tool_metrics_add_call(success, tool, tool_metrics, mock_get_meter_provider):
194201
tool = dict(tool, **{"name": "updated"})
195202
duration = 1
196203

197-
tool_metrics.add_call(tool, duration, success)
204+
metrics_client = MetricsClient()
205+
attributes = {"foo": "bar"}
206+
207+
tool_metrics.add_call(tool, duration, success, metrics_client, attributes=attributes)
198208

199209
tru_attrs = dataclasses.asdict(tool_metrics)
200210
exp_attrs = {
@@ -205,21 +215,30 @@ def test_tool_metrics_add_call(success, tool, tool_metrics):
205215
"total_time": duration,
206216
}
207217

218+
mock_get_meter_provider.return_value.get_meter.assert_called()
219+
metrics_client.tool_call_count.add.assert_called_with(1, attributes=attributes)
220+
metrics_client.tool_duration.record.assert_called_with(duration, attributes=attributes)
221+
if success:
222+
metrics_client.tool_success_count.add.assert_called_with(1, attributes=attributes)
208223
assert tru_attrs == exp_attrs
209224

210225

211226
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
212227
@unittest.mock.patch.object(strands.telemetry.metrics.uuid, "uuid4")
213-
def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metrics):
228+
def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metrics, mock_get_meter_provider):
214229
mock_time.return_value = 1
215230
mock_uuid4.return_value = "i1"
216231

217-
tru_start_time, tru_cycle_trace = event_loop_metrics.start_cycle()
232+
metrics_client = MetricsClient()
233+
234+
tru_start_time, tru_cycle_trace = event_loop_metrics.start_cycle(metrics_client)
218235
exp_start_time, exp_cycle_trace = 1, strands.telemetry.metrics.Trace("Cycle 1")
219236

220237
tru_attrs = {"cycle_count": event_loop_metrics.cycle_count, "traces": event_loop_metrics.traces}
221238
exp_attrs = {"cycle_count": 1, "traces": [tru_cycle_trace]}
222239

240+
mock_get_meter_provider.return_value.get_meter.assert_called()
241+
metrics_client.event_loop_cycle_count.add.assert_called()
223242
assert (
224243
tru_start_time == exp_start_time
225244
and tru_cycle_trace.to_dict() == exp_cycle_trace.to_dict()
@@ -228,10 +247,11 @@ def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metric
228247

229248

230249
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
231-
def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics):
250+
def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics, mock_get_meter_provider):
232251
mock_time.return_value = 1
252+
metrics_client = MetricsClient()
233253

234-
event_loop_metrics.end_cycle(start_time=0, cycle_trace=trace)
254+
event_loop_metrics.end_cycle(start_time=0, cycle_trace=trace, metrics_client=metrics_client)
235255

236256
tru_cycle_durations = event_loop_metrics.cycle_durations
237257
exp_cycle_durations = [1]
@@ -243,16 +263,22 @@ def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics):
243263

244264
assert tru_trace_end_time == exp_trace_end_time
245265

266+
mock_get_meter_provider.return_value.get_meter.assert_called()
267+
metrics_client.event_loop_end_cycle.add.assert_called_with(1)
268+
metrics_client.event_loop_cycle_duration.record.assert_called()
269+
246270

247271
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
248-
def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_metrics):
272+
def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_metrics, mock_get_meter_provider):
273+
metrics_client = MetricsClient()
249274
mock_time.return_value = 1
250-
251275
duration = 1
252276
success = True
253277
message = {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "tool_name": "tool1"}}]}
254278

255-
event_loop_metrics.add_tool_usage(tool, duration, trace, success, message)
279+
event_loop_metrics.add_tool_usage(tool, duration, trace, success, message, metrics_client)
280+
281+
mock_get_meter_provider.return_value.get_meter.assert_called()
256282

257283
tru_event_loop_metrics_attrs = {"tool_metrics": event_loop_metrics.tool_metrics}
258284
exp_event_loop_metrics_attrs = {
@@ -312,12 +338,13 @@ def test_event_loop_metrics_update_metrics(metrics, event_loop_metrics):
312338
assert tru_metrics == exp_metrics
313339

314340

315-
def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics):
341+
def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics, mock_get_meter_provider):
316342
duration = 1
317343
success = True
318344
message = {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "tool_name": "tool1"}}]}
345+
metrics_client = MetricsClient()
319346

320-
event_loop_metrics.add_tool_usage(tool, duration, trace, success, message)
347+
event_loop_metrics.add_tool_usage(tool, duration, trace, success, message, metrics_client)
321348

322349
tru_summary = event_loop_metrics.get_summary()
323350
exp_summary = {

0 commit comments

Comments
 (0)
0