[Misc] Add vLLM version getter to utils (#5098)

This commit is contained in:
Cyrus Leung 2024-06-14 02:21:39 +08:00 committed by GitHub
parent a65634d3ae
commit 03dccc886e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 13 additions and 11 deletions

View File

@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
def get_vllm_version() -> str: def get_vllm_version() -> str:
version = find_version(get_path("vllm", "__init__.py")) version = find_version(get_path("vllm", "version.py"))
if _is_cuda(): if _is_cuda():
cuda_version = str(get_nvcc_cuda_version()) cuda_version = str(get_nvcc_cuda_version())

View File

@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
__version__ = "0.5.0" from .version import __version__
__all__ = [ __all__ = [
"__version__",
"LLM", "LLM",
"ModelRegistry", "ModelRegistry",
"PromptStrictInputs", "PromptStrictInputs",

View File

@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
from transformers import GenerationConfig, PreTrainedTokenizer from transformers import GenerationConfig, PreTrainedTokenizer
import vllm
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig, from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
LoRAConfig, ModelConfig, ParallelConfig, LoRAConfig, ModelConfig, ParallelConfig,
SchedulerConfig, SpeculativeConfig, SchedulerConfig, SpeculativeConfig,
@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled, from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
usage_message) usage_message)
from vllm.utils import Counter from vllm.utils import Counter
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__) logger = init_logger(__name__)
_LOCAL_LOGGING_INTERVAL_SEC = 5 _LOCAL_LOGGING_INTERVAL_SEC = 5
@ -169,7 +169,7 @@ class LLMEngine:
"enforce_eager=%s, kv_cache_dtype=%s, " "enforce_eager=%s, kv_cache_dtype=%s, "
"quantization_param_path=%s, device_config=%s, " "quantization_param_path=%s, device_config=%s, "
"decoding_config=%r, seed=%d, served_model_name=%s)", "decoding_config=%r, seed=%d, served_model_name=%s)",
vllm.__version__, VLLM_VERSION,
model_config.model, model_config.model,
speculative_config, speculative_config,
model_config.tokenizer, model_config.tokenizer,

View File

@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
from prometheus_client import make_asgi_app from prometheus_client import make_asgi_app
from starlette.routing import Mount from starlette.routing import Mount
import vllm
import vllm.envs as envs import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.version import __version__ as VLLM_VERSION
TIMEOUT_KEEP_ALIVE = 5 # seconds TIMEOUT_KEEP_ALIVE = 5 # seconds
@ -93,7 +93,7 @@ async def show_available_models():
@app.get("/version") @app.get("/version")
async def show_version(): async def show_version():
ver = {"version": vllm.__version__} ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver) return JSONResponse(content=ver)
@ -174,7 +174,7 @@ if __name__ == "__main__":
raise ValueError(f"Invalid middleware {middleware}. " raise ValueError(f"Invalid middleware {middleware}. "
f"Must be a function or a class.") f"Must be a function or a class.")
logger.info("vLLM API server version %s", vllm.__version__) logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args) logger.info("args: %s", args)
if args.served_model_name is not None: if args.served_model_name is not None:

View File

@ -5,7 +5,6 @@ from io import StringIO
import aiohttp import aiohttp
import vllm
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (BatchRequestInput, from vllm.entrypoints.openai.protocol import (BatchRequestInput,
@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import random_uuid from vllm.utils import random_uuid
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__) logger = init_logger(__name__)
@ -135,7 +135,7 @@ async def main(args):
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
logger.info("vLLM API server version %s", vllm.__version__) logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args) logger.info("args: %s", args)
asyncio.run(main(args)) asyncio.run(main(args))

View File

@ -16,6 +16,7 @@ import requests
import torch import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.version import __version__ as VLLM_VERSION
_config_home = envs.VLLM_CONFIG_ROOT _config_home = envs.VLLM_CONFIG_ROOT
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json") _USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
@ -163,9 +164,8 @@ class UsageMessage:
]) ])
# vLLM information # vLLM information
import vllm # delayed import to prevent circular import
self.context = usage_context.value self.context = usage_context.value
self.vllm_version = vllm.__version__ self.vllm_version = VLLM_VERSION
self.model_architecture = model_architecture self.model_architecture = model_architecture
# Metadata # Metadata

1
vllm/version.py Normal file
View File

@ -0,0 +1 @@
__version__ = "0.5.0"