Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- logs: add exception support to Logger emit and LogRecord attributes
([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907))
- Drop Python 3.9 support
([#5076](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/5076))
([#5076](https://github.com/open-telemetry/opentelemetry-python/pull/5076))
- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers
([#5084](https://github.com/open-telemetry/opentelemetry-python/issues/5084))

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.



## Version 1.41.0/0.62b0 (2026-04-09)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,34 @@
"""


# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclientoperationduration
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS: Final = (
0.01,
0.02,
0.04,
0.08,
0.16,
0.32,
0.64,
1.28,
2.56,
5.12,
10.24,
20.48,
40.96,
81.92,
)


def create_gen_ai_client_operation_duration(meter: Meter) -> Histogram:
"""GenAI operation duration"""
return meter.create_histogram(
name=GEN_AI_CLIENT_OPERATION_DURATION,
description="GenAI operation duration.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
),
)


Expand Down Expand Up @@ -61,10 +83,15 @@ def create_gen_ai_client_token_usage(meter: Meter) -> Histogram:

def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
"""Generative AI server request duration such as time-to-last byte or last output token"""
# Shares the latency-style boundaries with client operation duration and
# time-to-first-token per the semconv spec.
return meter.create_histogram(
name=GEN_AI_SERVER_REQUEST_DURATION,
description="Generative AI server request duration such as time-to-last byte or last output token.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
),
)


Expand All @@ -78,12 +105,33 @@ def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
"""


# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiservertime_per_output_token
_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS: Final = (
0.01,
0.025,
0.05,
0.075,
0.1,
0.15,
0.2,
0.3,
0.4,
0.5,
0.75,
1.0,
2.5,
)


def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:
"""Time per output token generated after the first token for successful responses"""
return meter.create_histogram(
name=GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN,
description="Time per output token generated after the first token for successful responses.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS
),
)


Expand All @@ -97,8 +145,13 @@ def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:

def create_gen_ai_server_time_to_first_token(meter: Meter) -> Histogram:
"""Time to generate first token for successful responses"""
# Shares the latency-style boundaries with client operation duration per
# the semconv spec.
return meter.create_histogram(
name=GEN_AI_SERVER_TIME_TO_FIRST_TOKEN,
description="Time to generate first token for successful responses.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
),
)