[CI/Build] Pin OpenTelemetry versions and make errors clearer (#7266)

[CI/Build] Pin OpenTelemetry versions and make a availability errors clearer (#7266)
This commit is contained in:
Ronen Schaffer 2024-08-20 20:02:21 +03:00 committed by GitHub
parent c42590f97a
commit 2aa00d59ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 19 deletions

View File

@ -111,10 +111,10 @@ steps:
commands: commands:
- pytest -v -s metrics - pytest -v -s metrics
- "pip install \ - "pip install \
opentelemetry-sdk \ 'opentelemetry-sdk>=1.26.0,<1.27.0' \
opentelemetry-api \ 'opentelemetry-api>=1.26.0,<1.27.0' \
opentelemetry-exporter-otlp \ 'opentelemetry-exporter-otlp>=1.26.0,<1.27.0' \
opentelemetry-semantic-conventions-ai" 'opentelemetry-semantic-conventions-ai>=0.4.1,<0.5.0'"
- pytest -v -s tracing - pytest -v -s tracing
##### fast check tests ##### ##### fast check tests #####

View File

@ -3,10 +3,10 @@
1. Install OpenTelemetry packages: 1. Install OpenTelemetry packages:
``` ```
pip install \ pip install \
opentelemetry-sdk \ 'opentelemetry-sdk>=1.26.0,<1.27.0' \
opentelemetry-api \ 'opentelemetry-api>=1.26.0,<1.27.0' \
opentelemetry-exporter-otlp \ 'opentelemetry-exporter-otlp>=1.26.0,<1.27.0' \
opentelemetry-semantic-conventions-ai 'opentelemetry-semantic-conventions-ai>=0.4.1,<0.5.0'
``` ```
1. Start Jaeger in a docker container: 1. Start Jaeger in a docker container:

View File

@ -12,7 +12,7 @@ from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.model_executor.models import ModelRegistry from vllm.model_executor.models import ModelRegistry
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.tracing import is_otel_installed from vllm.tracing import is_otel_available, otel_import_error_traceback
from vllm.transformers_utils.config import get_config, get_hf_text_config from vllm.transformers_utils.config import get_config, get_hf_text_config
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_CUDAGRAPH, GiB_bytes, from vllm.utils import (STR_NOT_IMPL_ENC_DEC_CUDAGRAPH, GiB_bytes,
cuda_device_count_stateless, get_cpu_memory, is_cpu, cuda_device_count_stateless, get_cpu_memory, is_cpu,
@ -1721,9 +1721,11 @@ class ObservabilityConfig:
collect_model_execute_time: bool = False collect_model_execute_time: bool = False
def __post_init__(self): def __post_init__(self):
if not is_otel_installed() and self.otlp_traces_endpoint is not None: if not is_otel_available() and self.otlp_traces_endpoint is not None:
raise ValueError("OpenTelemetry packages must be installed before " raise ValueError(
"configuring 'otlp_traces_endpoint'") "OpenTelemetry is not available. Unable to configure "
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
f"installed. Original error:\n{otel_import_error_traceback}")
if ((self.collect_model_forward_time if ((self.collect_model_forward_time
or self.collect_model_execute_time) or self.collect_model_execute_time)

View File

@ -8,7 +8,8 @@ TRACE_HEADERS = ["traceparent", "tracestate"]
logger = init_logger(__name__) logger = init_logger(__name__)
_is_otel_installed = False _is_otel_imported = False
otel_import_error_traceback: Optional[str] = None
try: try:
from opentelemetry.context.context import Context from opentelemetry.context.context import Context
from opentelemetry.sdk.environment_variables import ( from opentelemetry.sdk.environment_variables import (
@ -19,8 +20,14 @@ try:
from opentelemetry.trace import SpanKind, Tracer, set_tracer_provider from opentelemetry.trace import SpanKind, Tracer, set_tracer_provider
from opentelemetry.trace.propagation.tracecontext import ( from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator) TraceContextTextMapPropagator)
_is_otel_installed = True _is_otel_imported = True
except ImportError: except ImportError:
# Capture and format traceback to provide detailed context for the import
# error. Only the string representation of the error is retained to avoid
# memory leaks.
# See https://github.com/vllm-project/vllm/pull/7266#discussion_r1707395458
import traceback
otel_import_error_traceback = traceback.format_exc()
class Context: # type: ignore class Context: # type: ignore
pass pass
@ -35,14 +42,17 @@ except ImportError:
pass pass
def is_otel_installed() -> bool: def is_otel_available() -> bool:
return _is_otel_installed return _is_otel_imported
def init_tracer(instrumenting_module_name: str, def init_tracer(instrumenting_module_name: str,
otlp_traces_endpoint: str) -> Optional[Tracer]: otlp_traces_endpoint: str) -> Optional[Tracer]:
assert is_otel_installed(), ("OpenTelemetry packages must be installed " if not is_otel_available():
"prior to initializing a tracer") raise ValueError(
"OpenTelemetry is not available. Unable to initialize "
"a tracer. Ensure OpenTelemetry packages are installed. "
f"Original error:\n{otel_import_error_traceback}")
trace_provider = TracerProvider() trace_provider = TracerProvider()
span_exporter = get_span_exporter(otlp_traces_endpoint) span_exporter = get_span_exporter(otlp_traces_endpoint)
@ -70,7 +80,7 @@ def get_span_exporter(endpoint):
def extract_trace_context( def extract_trace_context(
headers: Optional[Mapping[str, str]]) -> Optional[Context]: headers: Optional[Mapping[str, str]]) -> Optional[Context]:
if is_otel_installed(): if is_otel_available():
headers = headers or {} headers = headers or {}
return TraceContextTextMapPropagator().extract(headers) return TraceContextTextMapPropagator().extract(headers)
else: else: