[Misc] Add vLLM version getter to utils (#5098)
This commit is contained in:
parent
a65634d3ae
commit
03dccc886e
2
setup.py
2
setup.py
@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_vllm_version() -> str:
|
def get_vllm_version() -> str:
|
||||||
version = find_version(get_path("vllm", "__init__.py"))
|
version = find_version(get_path("vllm", "version.py"))
|
||||||
|
|
||||||
if _is_cuda():
|
if _is_cuda():
|
||||||
cuda_version = str(get_nvcc_cuda_version())
|
cuda_version = str(get_nvcc_cuda_version())
|
||||||
|
@ -12,9 +12,10 @@ from vllm.outputs import (CompletionOutput, EmbeddingOutput,
|
|||||||
from vllm.pooling_params import PoolingParams
|
from vllm.pooling_params import PoolingParams
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
|
||||||
__version__ = "0.5.0"
|
from .version import __version__
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
"__version__",
|
||||||
"LLM",
|
"LLM",
|
||||||
"ModelRegistry",
|
"ModelRegistry",
|
||||||
"PromptStrictInputs",
|
"PromptStrictInputs",
|
||||||
|
@ -6,7 +6,6 @@ from typing import Type, TypeVar, Union
|
|||||||
|
|
||||||
from transformers import GenerationConfig, PreTrainedTokenizer
|
from transformers import GenerationConfig, PreTrainedTokenizer
|
||||||
|
|
||||||
import vllm
|
|
||||||
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
|
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
|
||||||
LoRAConfig, ModelConfig, ParallelConfig,
|
LoRAConfig, ModelConfig, ParallelConfig,
|
||||||
SchedulerConfig, SpeculativeConfig,
|
SchedulerConfig, SpeculativeConfig,
|
||||||
@ -38,6 +37,7 @@ from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
|
|||||||
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
|
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
|
||||||
usage_message)
|
usage_message)
|
||||||
from vllm.utils import Counter
|
from vllm.utils import Counter
|
||||||
|
from vllm.version import __version__ as VLLM_VERSION
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
_LOCAL_LOGGING_INTERVAL_SEC = 5
|
_LOCAL_LOGGING_INTERVAL_SEC = 5
|
||||||
@ -169,7 +169,7 @@ class LLMEngine:
|
|||||||
"enforce_eager=%s, kv_cache_dtype=%s, "
|
"enforce_eager=%s, kv_cache_dtype=%s, "
|
||||||
"quantization_param_path=%s, device_config=%s, "
|
"quantization_param_path=%s, device_config=%s, "
|
||||||
"decoding_config=%r, seed=%d, served_model_name=%s)",
|
"decoding_config=%r, seed=%d, served_model_name=%s)",
|
||||||
vllm.__version__,
|
VLLM_VERSION,
|
||||||
model_config.model,
|
model_config.model,
|
||||||
speculative_config,
|
speculative_config,
|
||||||
model_config.tokenizer,
|
model_config.tokenizer,
|
||||||
|
@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
|
|||||||
from prometheus_client import make_asgi_app
|
from prometheus_client import make_asgi_app
|
||||||
from starlette.routing import Mount
|
from starlette.routing import Mount
|
||||||
|
|
||||||
import vllm
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
@ -29,6 +28,7 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
|||||||
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
|
from vllm.version import __version__ as VLLM_VERSION
|
||||||
|
|
||||||
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
||||||
|
|
||||||
@ -93,7 +93,7 @@ async def show_available_models():
|
|||||||
|
|
||||||
@app.get("/version")
|
@app.get("/version")
|
||||||
async def show_version():
|
async def show_version():
|
||||||
ver = {"version": vllm.__version__}
|
ver = {"version": VLLM_VERSION}
|
||||||
return JSONResponse(content=ver)
|
return JSONResponse(content=ver)
|
||||||
|
|
||||||
|
|
||||||
@ -174,7 +174,7 @@ if __name__ == "__main__":
|
|||||||
raise ValueError(f"Invalid middleware {middleware}. "
|
raise ValueError(f"Invalid middleware {middleware}. "
|
||||||
f"Must be a function or a class.")
|
f"Must be a function or a class.")
|
||||||
|
|
||||||
logger.info("vLLM API server version %s", vllm.__version__)
|
logger.info("vLLM API server version %s", VLLM_VERSION)
|
||||||
logger.info("args: %s", args)
|
logger.info("args: %s", args)
|
||||||
|
|
||||||
if args.served_model_name is not None:
|
if args.served_model_name is not None:
|
||||||
|
@ -5,7 +5,6 @@ from io import StringIO
|
|||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
import vllm
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
|
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
|
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
|
||||||
@ -15,6 +14,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.usage.usage_lib import UsageContext
|
from vllm.usage.usage_lib import UsageContext
|
||||||
from vllm.utils import random_uuid
|
from vllm.utils import random_uuid
|
||||||
|
from vllm.version import __version__ as VLLM_VERSION
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -135,7 +135,7 @@ async def main(args):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
logger.info("vLLM API server version %s", vllm.__version__)
|
logger.info("vLLM API server version %s", VLLM_VERSION)
|
||||||
logger.info("args: %s", args)
|
logger.info("args: %s", args)
|
||||||
|
|
||||||
asyncio.run(main(args))
|
asyncio.run(main(args))
|
||||||
|
@ -16,6 +16,7 @@ import requests
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
|
from vllm.version import __version__ as VLLM_VERSION
|
||||||
|
|
||||||
_config_home = envs.VLLM_CONFIG_ROOT
|
_config_home = envs.VLLM_CONFIG_ROOT
|
||||||
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
|
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
|
||||||
@ -163,9 +164,8 @@ class UsageMessage:
|
|||||||
])
|
])
|
||||||
|
|
||||||
# vLLM information
|
# vLLM information
|
||||||
import vllm # delayed import to prevent circular import
|
|
||||||
self.context = usage_context.value
|
self.context = usage_context.value
|
||||||
self.vllm_version = vllm.__version__
|
self.vllm_version = VLLM_VERSION
|
||||||
self.model_architecture = model_architecture
|
self.model_architecture = model_architecture
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
|
1
vllm/version.py
Normal file
1
vllm/version.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__version__ = "0.5.0"
|
Loading…
x
Reference in New Issue
Block a user