open-telemetry · alliasgher · Apr 13, 2026 · Apr 15, 2026 · Apr 16, 2026 · MikeGoldsmith
@@ -19,7 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - logs: add exception support to Logger emit and LogRecord attributes
   ([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907))
 - Drop Python 3.9 support
-  ([#5076](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/5076))
+  ([#5076](https://github.com/open-telemetry/opentelemetry-python/pull/5076))
+- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
+  ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
-  ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
+- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers
+  ([#5084](https://github.com/open-telemetry/opentelemetry-python/issues/5084))
- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
-  ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
+- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers
+  ([#5084](https://github.com/open-telemetry/opentelemetry-python/issues/5084))
 
 
 ## Version 1.41.0/0.62b0 (2026-04-09)

@@ -25,12 +25,34 @@
 """
 
 
+# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclientoperationduration
+_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS: Final = (
+    0.01,
+    0.02,
+    0.04,
+    0.08,
+    0.16,
+    0.32,
+    0.64,
+    1.28,
+    2.56,
+    5.12,
+    10.24,
+    20.48,
+    40.96,
+    81.92,
+)
+
+
 def create_gen_ai_client_operation_duration(meter: Meter) -> Histogram:
     """GenAI operation duration"""
     return meter.create_histogram(
         name=GEN_AI_CLIENT_OPERATION_DURATION,
         description="GenAI operation duration.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
+        ),
     )
 
 
@@ -61,10 +83,15 @@ def create_gen_ai_client_token_usage(meter: Meter) -> Histogram:
 
 def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
     """Generative AI server request duration such as time-to-last byte or last output token"""
+    # Shares the latency-style boundaries with client operation duration and
+    # time-to-first-token per the semconv spec.
     return meter.create_histogram(
         name=GEN_AI_SERVER_REQUEST_DURATION,
         description="Generative AI server request duration such as time-to-last byte or last output token.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
+        ),
     )
 
 
@@ -78,12 +105,33 @@ def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
 """
 
 
+# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiservertime_per_output_token
+_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS: Final = (
+    0.01,
+    0.025,
+    0.05,
+    0.075,
+    0.1,
+    0.15,
+    0.2,
+    0.3,
+    0.4,
+    0.5,
+    0.75,
+    1.0,
+    2.5,
+)
+
+
 def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:
     """Time per output token generated after the first token for successful responses"""
     return meter.create_histogram(
         name=GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN,
         description="Time per output token generated after the first token for successful responses.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS
+        ),
     )
 
 
@@ -97,8 +145,13 @@ def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:
 
 def create_gen_ai_server_time_to_first_token(meter: Meter) -> Histogram:
     """Time to generate first token for successful responses"""
+    # Shares the latency-style boundaries with client operation duration per
+    # the semconv spec.
     return meter.create_histogram(
         name=GEN_AI_SERVER_TIME_TO_FIRST_TOKEN,
         description="Time to generate first token for successful responses.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
+        ),
     )