[Misc] Add vLLM version getter to utils (#5098)

This commit is contained in:
Cyrus Leung 2024-06-14 02:21:39 +08:00 committed by GitHub
parent a65634d3ae
commit 03dccc886e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 13 additions and 11 deletions

View File

@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
def get_vllm_version() -> str:
version = find_version(get_path("vllm", "__init__.py"))
version = find_version(get_path("vllm", "version.py"))
if _is_cuda():
cuda_version = str(get_nvcc_cuda_version())

View File

@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams
__version__ = "0.5.0"
from .version import __version__
__all__ = [
"__version__",
"LLM",
"ModelRegistry",
"PromptStrictInputs",

View File

@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
from transformers import GenerationConfig, PreTrainedTokenizer
import vllm
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
LoRAConfig, ModelConfig, ParallelConfig,
SchedulerConfig, SpeculativeConfig,
@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
usage_message)
from vllm.utils import Counter
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__)
_LOCAL_LOGGING_INTERVAL_SEC = 5
@ -169,7 +169,7 @@ class LLMEngine:
"enforce_eager=%s, kv_cache_dtype=%s, "
"quantization_param_path=%s, device_config=%s, "
"decoding_config=%r, seed=%d, served_model_name=%s)",
vllm.__version__,
VLLM_VERSION,
model_config.model,
speculative_config,
model_config.tokenizer,

View File

@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
from prometheus_client import make_asgi_app
from starlette.routing import Mount
import vllm
import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.version import __version__ as VLLM_VERSION
TIMEOUT_KEEP_ALIVE = 5 # seconds
@ -93,7 +93,7 @@ async def show_available_models():
@app.get("/version")
async def show_version():
ver = {"version": vllm.__version__}
ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver)
@ -174,7 +174,7 @@ if __name__ == "__main__":
raise ValueError(f"Invalid middleware {middleware}. "
f"Must be a function or a class.")
logger.info("vLLM API server version %s", vllm.__version__)
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)
if args.served_model_name is not None:

View File

@ -5,7 +5,6 @@ from io import StringIO
import aiohttp
import vllm
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import random_uuid
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__)
@ -135,7 +135,7 @@ async def main(args):
if __name__ == "__main__":
args = parse_args()
logger.info("vLLM API server version %s", vllm.__version__)
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)
asyncio.run(main(args))

View File

@ -16,6 +16,7 @@ import requests
import torch
import vllm.envs as envs
from vllm.version import __version__ as VLLM_VERSION
_config_home = envs.VLLM_CONFIG_ROOT
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
@ -163,9 +164,8 @@ class UsageMessage:
])
# vLLM information
import vllm # delayed import to prevent circular import
self.context = usage_context.value
self.vllm_version = vllm.__version__
self.vllm_version = VLLM_VERSION
self.model_architecture = model_architecture
# Metadata

1
vllm/version.py Normal file
View File

@ -0,0 +1 @@
__version__ = "0.5.0"