2023-02-09 11:26:50 +00:00
|
|
|
import enum
|
2024-01-04 03:30:22 +08:00
|
|
|
import os
|
2023-12-16 21:12:08 -08:00
|
|
|
import socket
|
2023-05-23 21:39:50 -07:00
|
|
|
import uuid
|
2023-09-26 22:27:13 -07:00
|
|
|
from platform import uname
|
2024-01-04 03:30:22 +08:00
|
|
|
from typing import List
|
2023-03-22 04:45:42 +08:00
|
|
|
|
2023-05-09 15:30:12 -07:00
|
|
|
import psutil
|
2023-03-22 04:45:42 +08:00
|
|
|
import torch
|
|
|
|
|
2023-11-23 16:31:19 -08:00
|
|
|
from vllm._C import cuda_utils
|
2023-09-26 22:27:13 -07:00
|
|
|
|
2023-02-09 11:26:50 +00:00
|
|
|
|
|
|
|
class Device(enum.Enum):
|
|
|
|
GPU = enum.auto()
|
|
|
|
CPU = enum.auto()
|
|
|
|
|
|
|
|
|
|
|
|
class Counter:
|
|
|
|
|
|
|
|
def __init__(self, start: int = 0) -> None:
|
|
|
|
self.counter = start
|
|
|
|
|
2023-02-14 01:19:27 +00:00
|
|
|
def __next__(self) -> int:
|
2023-07-03 11:31:55 -07:00
|
|
|
i = self.counter
|
2023-02-09 11:26:50 +00:00
|
|
|
self.counter += 1
|
2023-07-03 11:31:55 -07:00
|
|
|
return i
|
2023-02-09 11:26:50 +00:00
|
|
|
|
|
|
|
def reset(self) -> None:
|
|
|
|
self.counter = 0
|
2023-03-22 04:45:42 +08:00
|
|
|
|
2023-03-29 14:48:56 +08:00
|
|
|
|
2023-12-08 15:16:52 +08:00
|
|
|
def is_hip() -> bool:
|
|
|
|
return torch.version.hip is not None
|
|
|
|
|
|
|
|
|
2023-09-26 22:27:13 -07:00
|
|
|
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
|
|
|
|
"""Returns the maximum shared memory per thread block in bytes."""
|
|
|
|
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
|
2023-12-08 15:16:52 +08:00
|
|
|
cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
|
2023-09-26 22:27:13 -07:00
|
|
|
max_shared_mem = cuda_utils.get_device_attribute(
|
|
|
|
cudaDevAttrMaxSharedMemoryPerBlockOptin, gpu)
|
|
|
|
return int(max_shared_mem)
|
|
|
|
|
|
|
|
|
2023-03-29 14:48:56 +08:00
|
|
|
def get_cpu_memory() -> int:
|
2023-05-23 18:22:26 -07:00
|
|
|
"""Returns the total CPU memory of the node in bytes."""
|
2023-03-29 14:48:56 +08:00
|
|
|
return psutil.virtual_memory().total
|
2023-05-23 21:39:50 -07:00
|
|
|
|
|
|
|
|
|
|
|
def random_uuid() -> str:
|
|
|
|
return str(uuid.uuid4().hex)
|
2023-06-29 15:00:21 -07:00
|
|
|
|
2023-07-03 11:31:55 -07:00
|
|
|
|
2023-06-29 15:00:21 -07:00
|
|
|
def in_wsl() -> bool:
|
|
|
|
# Reference: https://github.com/microsoft/WSL/issues/4071
|
|
|
|
return "microsoft" in " ".join(uname()).lower()
|
2023-12-16 21:12:08 -08:00
|
|
|
|
|
|
|
|
2024-01-04 03:30:22 +08:00
|
|
|
def get_ip() -> str:
|
|
|
|
return socket.gethostbyname(socket.gethostname())
|
|
|
|
|
|
|
|
|
|
|
|
def get_open_port() -> int:
|
2023-12-16 21:12:08 -08:00
|
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
|
|
s.bind(("", 0))
|
|
|
|
return s.getsockname()[1]
|
2024-01-04 03:30:22 +08:00
|
|
|
|
|
|
|
|
|
|
|
def set_cuda_visible_devices(device_ids: List[int]) -> None:
|
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids))
|