[Misc] Add vLLM version getter to utils (#5098)
This commit is contained in:
parent
a65634d3ae
commit
03dccc886e
2
setup.py
2
setup.py
@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
|
||||
|
||||
|
||||
def get_vllm_version() -> str:
|
||||
version = find_version(get_path("vllm", "__init__.py"))
|
||||
version = find_version(get_path("vllm", "version.py"))
|
||||
|
||||
if _is_cuda():
|
||||
cuda_version = str(get_nvcc_cuda_version())
|
||||
|
@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
__version__ = "0.5.0"
|
||||
from .version import __version__
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"LLM",
|
||||
"ModelRegistry",
|
||||
"PromptStrictInputs",
|
||||
|
@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
|
||||
|
||||
from transformers import GenerationConfig, PreTrainedTokenizer
|
||||
|
||||
import vllm
|
||||
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
|
||||
LoRAConfig, ModelConfig, ParallelConfig,
|
||||
SchedulerConfig, SpeculativeConfig,
|
||||
@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
|
||||
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
|
||||
usage_message)
|
||||
from vllm.utils import Counter
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
logger = init_logger(__name__)
|
||||
_LOCAL_LOGGING_INTERVAL_SEC = 5
|
||||
@ -169,7 +169,7 @@ class LLMEngine:
|
||||
"enforce_eager=%s, kv_cache_dtype=%s, "
|
||||
"quantization_param_path=%s, device_config=%s, "
|
||||
"decoding_config=%r, seed=%d, served_model_name=%s)",
|
||||
vllm.__version__,
|
||||
VLLM_VERSION,
|
||||
model_config.model,
|
||||
speculative_config,
|
||||
model_config.tokenizer,
|
||||
|
@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
|
||||
from prometheus_client import make_asgi_app
|
||||
from starlette.routing import Mount
|
||||
|
||||
import vllm
|
||||
import vllm.envs as envs
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
||||
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
||||
from vllm.logger import init_logger
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
||||
|
||||
@ -93,7 +93,7 @@ async def show_available_models():
|
||||
|
||||
@app.get("/version")
|
||||
async def show_version():
|
||||
ver = {"version": vllm.__version__}
|
||||
ver = {"version": VLLM_VERSION}
|
||||
return JSONResponse(content=ver)
|
||||
|
||||
|
||||
@ -174,7 +174,7 @@ if __name__ == "__main__":
|
||||
raise ValueError(f"Invalid middleware {middleware}. "
|
||||
f"Must be a function or a class.")
|
||||
|
||||
logger.info("vLLM API server version %s", vllm.__version__)
|
||||
logger.info("vLLM API server version %s", VLLM_VERSION)
|
||||
logger.info("args: %s", args)
|
||||
|
||||
if args.served_model_name is not None:
|
||||
|
@ -5,7 +5,6 @@ from io import StringIO
|
||||
|
||||
import aiohttp
|
||||
|
||||
import vllm
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
|
||||
@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
from vllm.logger import init_logger
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import random_uuid
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@ -135,7 +135,7 @@ async def main(args):
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
logger.info("vLLM API server version %s", vllm.__version__)
|
||||
logger.info("vLLM API server version %s", VLLM_VERSION)
|
||||
logger.info("args: %s", args)
|
||||
|
||||
asyncio.run(main(args))
|
||||
|
@ -16,6 +16,7 @@ import requests
|
||||
import torch
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
_config_home = envs.VLLM_CONFIG_ROOT
|
||||
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
|
||||
@ -163,9 +164,8 @@ class UsageMessage:
|
||||
])
|
||||
|
||||
# vLLM information
|
||||
import vllm # delayed import to prevent circular import
|
||||
self.context = usage_context.value
|
||||
self.vllm_version = vllm.__version__
|
||||
self.vllm_version = VLLM_VERSION
|
||||
self.model_architecture = model_architecture
|
||||
|
||||
# Metadata
|
||||
|
1
vllm/version.py
Normal file
1
vllm/version.py
Normal file
@ -0,0 +1 @@
|
||||
__version__ = "0.5.0"
|
Loading…
x
Reference in New Issue
Block a user