Specify python package dependencies in requirements.txt (#78)

This commit is contained in:
Woosuk Kwon 2023-05-07 16:30:43 -07:00 committed by GitHub
parent c84e924287
commit 7addca5935
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 36 deletions

View File

@ -1,10 +1,10 @@
# CacheFlow # CacheFlow
## Installation ## Build from source
```bash ```bash
pip install ninja psutil numpy sentencepiece ray torch transformers xformers pip install -r requirements.txt
pip install -e . pip install -e . # This may take several minutes.
``` ```
## Test simple server ## Test simple server
@ -21,11 +21,6 @@ python simple_server.py --help
## FastAPI server ## FastAPI server
Install the following additional dependencies:
```bash
pip install fastapi uvicorn
```
To start the server: To start the server:
```bash ```bash
ray start --head ray start --head

10
requirements.txt Normal file
View File

@ -0,0 +1,10 @@
ninja # For faster builds.
psutil
ray
sentencepiece # Required for LLaMA tokenizer.
numpy
torch >= 2.0.0
transformers >= 4.28.0 # Required for LLaMA.
xformers >= 0.0.19
fastapi
uvicorn

View File

@ -1,14 +1,20 @@
from typing import List
import setuptools import setuptools
import torch import torch
from torch.utils import cpp_extension from torch.utils.cpp_extension import BuildExtension, CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME
CXX_FLAGS = ['-g']
NVCC_FLAGS = ['-O2'] # Build custom operators.
CXX_FLAGS = ["-g"]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS = ["-O2"]
if not torch.cuda.is_available(): if not torch.cuda.is_available():
raise RuntimeError( raise RuntimeError(
f'Cannot find CUDA at CUDA_HOME: {cpp_extension.CUDA_HOME}. ' f"Cannot find CUDA at CUDA_HOME: {CUDA_HOME}. "
'CUDA must be available in order to build the package.') "CUDA must be available in order to build the package.")
# FIXME(woosuk): Consider the case where the machine has multiple GPUs with # FIXME(woosuk): Consider the case where the machine has multiple GPUs with
# different compute capabilities. # different compute capabilities.
@ -16,52 +22,62 @@ compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability major, minor = compute_capability
# Enable bfloat16 support if the compute capability is >= 8.0. # Enable bfloat16 support if the compute capability is >= 8.0.
if major >= 8: if major >= 8:
NVCC_FLAGS.append('-DENABLE_BF16') NVCC_FLAGS.append("-DENABLE_BF16")
ext_modules = [] ext_modules = []
# Cache operations. # Cache operations.
cache_extension = cpp_extension.CUDAExtension( cache_extension = CUDAExtension(
name='cacheflow.cache_ops', name="cacheflow.cache_ops",
sources=['csrc/cache.cpp', 'csrc/cache_kernels.cu'], sources=["csrc/cache.cpp", "csrc/cache_kernels.cu"],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
) )
ext_modules.append(cache_extension) ext_modules.append(cache_extension)
# Attention kernels. # Attention kernels.
attention_extension = cpp_extension.CUDAExtension( attention_extension = CUDAExtension(
name='cacheflow.attention_ops', name="cacheflow.attention_ops",
sources=['csrc/attention.cpp', 'csrc/attention/attention_kernels.cu'], sources=["csrc/attention.cpp", "csrc/attention/attention_kernels.cu"],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
) )
ext_modules.append(attention_extension) ext_modules.append(attention_extension)
# Positional encoding kernels. # Positional encoding kernels.
positional_encoding_extension = cpp_extension.CUDAExtension( positional_encoding_extension = CUDAExtension(
name='cacheflow.pos_encoding_ops', name="cacheflow.pos_encoding_ops",
sources=['csrc/pos_encoding.cpp', 'csrc/pos_encoding_kernels.cu'], sources=["csrc/pos_encoding.cpp", "csrc/pos_encoding_kernels.cu"],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
) )
ext_modules.append(positional_encoding_extension) ext_modules.append(positional_encoding_extension)
# Layer normalization kernels. # Layer normalization kernels.
layernorm_extension = cpp_extension.CUDAExtension( layernorm_extension = CUDAExtension(
name='cacheflow.layernorm_ops', name="cacheflow.layernorm_ops",
sources=['csrc/layernorm.cpp', 'csrc/layernorm_kernels.cu'], sources=["csrc/layernorm.cpp", "csrc/layernorm_kernels.cu"],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
) )
ext_modules.append(layernorm_extension) ext_modules.append(layernorm_extension)
# Activation kernels. # Activation kernels.
activation_extension = cpp_extension.CUDAExtension( activation_extension = CUDAExtension(
name='cacheflow.activation_ops', name="cacheflow.activation_ops",
sources=['csrc/activation.cpp', 'csrc/activation_kernels.cu'], sources=["csrc/activation.cpp", "csrc/activation_kernels.cu"],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
) )
ext_modules.append(activation_extension) ext_modules.append(activation_extension)
def get_requirements() -> List[str]:
"""Get Python package dependencies from requirements.txt."""
with open("requirements.txt") as f:
requirements = f.read().strip().split("\n")
return requirements
setuptools.setup( setuptools.setup(
name='cacheflow', name="cacheflow",
python_requires=">=3.8",
install_requires=get_requirements(),
ext_modules=ext_modules, ext_modules=ext_modules,
cmdclass={'build_ext': cpp_extension.BuildExtension}, cmdclass={"build_ext": BuildExtension},
) )