vllm/cacheflow/utils.py

import enum

import psutil
import torch


class Device(enum.Enum):
    GPU = enum.auto()
    CPU = enum.auto()


class Counter:

    def __init__(self, start: int = 0) -> None:
        self.counter = start

    def __next__(self) -> int:
        id = self.counter
        self.counter += 1
        return id

    def reset(self) -> None:
        self.counter = 0


def get_gpu_memory(gpu: int = 0) -> int:
    """Returns the total memory of the GPU in bytes."""
    return torch.cuda.get_device_properties(gpu).total_memory


def get_cpu_memory() -> int:
    """Returns the total CPU memory of the node in bytes."""
    return psutil.virtual_memory().total
Add utils 2023-02-09 11:26:50 +00:00			`import enum`
Support tensor parallel (#2) 2023-03-22 04:45:42 +08:00
Refactor system architecture (#82) 2023-05-09 15:30:12 -07:00			`import psutil`
Support tensor parallel (#2) 2023-03-22 04:45:42 +08:00			`import torch`

Add utils 2023-02-09 11:26:50 +00:00
			`class Device(enum.Enum):`
			`GPU = enum.auto()`
			`CPU = enum.auto()`


			`class Counter:`

			`def __init__(self, start: int = 0) -> None:`
			`self.counter = start`

Fix typo 2023-02-14 01:19:27 +00:00			`def __next__(self) -> int:`
Add utils 2023-02-09 11:26:50 +00:00			`id = self.counter`
			`self.counter += 1`
			`return id`

			`def reset(self) -> None:`
			`self.counter = 0`
Support tensor parallel (#2) 2023-03-22 04:45:42 +08:00
FastAPI-based working frontend (#10) 2023-03-29 14:48:56 +08:00
			`def get_gpu_memory(gpu: int = 0) -> int:`
Print warnings/errors for large swap space (#123) 2023-05-23 18:22:26 -07:00			`"""Returns the total memory of the GPU in bytes."""`
FastAPI-based working frontend (#10) 2023-03-29 14:48:56 +08:00			`return torch.cuda.get_device_properties(gpu).total_memory`


			`def get_cpu_memory() -> int:`
Print warnings/errors for large swap space (#123) 2023-05-23 18:22:26 -07:00			`"""Returns the total CPU memory of the node in bytes."""`
FastAPI-based working frontend (#10) 2023-03-29 14:48:56 +08:00			`return psutil.virtual_memory().total`