From 7addca5935c83806429d7ec557999a505e6f6a35 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 7 May 2023 16:30:43 -0700 Subject: [PATCH] Specify python package dependencies in requirements.txt (#78) --- README.md | 11 ++------ requirements.txt | 10 +++++++ setup.py | 72 +++++++++++++++++++++++++++++------------------- 3 files changed, 57 insertions(+), 36 deletions(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 0543b9de..b70cde90 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # CacheFlow -## Installation +## Build from source ```bash -pip install ninja psutil numpy sentencepiece ray torch transformers xformers -pip install -e . +pip install -r requirements.txt +pip install -e . # This may take several minutes. ``` ## Test simple server @@ -21,11 +21,6 @@ python simple_server.py --help ## FastAPI server -Install the following additional dependencies: -```bash -pip install fastapi uvicorn -``` - To start the server: ```bash ray start --head diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..bcb79da5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +ninja # For faster builds. +psutil +ray +sentencepiece # Required for LLaMA tokenizer. +numpy +torch >= 2.0.0 +transformers >= 4.28.0 # Required for LLaMA. +xformers >= 0.0.19 +fastapi +uvicorn diff --git a/setup.py b/setup.py index 52ff89f6..48538ffe 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,20 @@ +from typing import List + import setuptools import torch -from torch.utils import cpp_extension +from torch.utils.cpp_extension import BuildExtension, CUDAExtension +from torch.utils.cpp_extension import CUDA_HOME -CXX_FLAGS = ['-g'] -NVCC_FLAGS = ['-O2'] + +# Build custom operators. +CXX_FLAGS = ["-g"] +# TODO(woosuk): Should we use -O3? +NVCC_FLAGS = ["-O2"] if not torch.cuda.is_available(): raise RuntimeError( - f'Cannot find CUDA at CUDA_HOME: {cpp_extension.CUDA_HOME}. ' - 'CUDA must be available in order to build the package.') + f"Cannot find CUDA at CUDA_HOME: {CUDA_HOME}. " + "CUDA must be available in order to build the package.") # FIXME(woosuk): Consider the case where the machine has multiple GPUs with # different compute capabilities. @@ -16,52 +22,62 @@ compute_capability = torch.cuda.get_device_capability() major, minor = compute_capability # Enable bfloat16 support if the compute capability is >= 8.0. if major >= 8: - NVCC_FLAGS.append('-DENABLE_BF16') + NVCC_FLAGS.append("-DENABLE_BF16") ext_modules = [] # Cache operations. -cache_extension = cpp_extension.CUDAExtension( - name='cacheflow.cache_ops', - sources=['csrc/cache.cpp', 'csrc/cache_kernels.cu'], - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, +cache_extension = CUDAExtension( + name="cacheflow.cache_ops", + sources=["csrc/cache.cpp", "csrc/cache_kernels.cu"], + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, ) ext_modules.append(cache_extension) # Attention kernels. -attention_extension = cpp_extension.CUDAExtension( - name='cacheflow.attention_ops', - sources=['csrc/attention.cpp', 'csrc/attention/attention_kernels.cu'], - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, +attention_extension = CUDAExtension( + name="cacheflow.attention_ops", + sources=["csrc/attention.cpp", "csrc/attention/attention_kernels.cu"], + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, ) ext_modules.append(attention_extension) # Positional encoding kernels. -positional_encoding_extension = cpp_extension.CUDAExtension( - name='cacheflow.pos_encoding_ops', - sources=['csrc/pos_encoding.cpp', 'csrc/pos_encoding_kernels.cu'], - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, +positional_encoding_extension = CUDAExtension( + name="cacheflow.pos_encoding_ops", + sources=["csrc/pos_encoding.cpp", "csrc/pos_encoding_kernels.cu"], + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, ) ext_modules.append(positional_encoding_extension) # Layer normalization kernels. -layernorm_extension = cpp_extension.CUDAExtension( - name='cacheflow.layernorm_ops', - sources=['csrc/layernorm.cpp', 'csrc/layernorm_kernels.cu'], - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, +layernorm_extension = CUDAExtension( + name="cacheflow.layernorm_ops", + sources=["csrc/layernorm.cpp", "csrc/layernorm_kernels.cu"], + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, ) ext_modules.append(layernorm_extension) # Activation kernels. -activation_extension = cpp_extension.CUDAExtension( - name='cacheflow.activation_ops', - sources=['csrc/activation.cpp', 'csrc/activation_kernels.cu'], - extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS}, +activation_extension = CUDAExtension( + name="cacheflow.activation_ops", + sources=["csrc/activation.cpp", "csrc/activation_kernels.cu"], + extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS}, ) ext_modules.append(activation_extension) + +def get_requirements() -> List[str]: + """Get Python package dependencies from requirements.txt.""" + with open("requirements.txt") as f: + requirements = f.read().strip().split("\n") + return requirements + + setuptools.setup( - name='cacheflow', + name="cacheflow", + python_requires=">=3.8", + install_requires=get_requirements(), ext_modules=ext_modules, - cmdclass={'build_ext': cpp_extension.BuildExtension}, + cmdclass={"build_ext": BuildExtension}, )