Clean up remaining Punica C information (#7027)
This commit is contained in:
parent
16a1cc9bb2
commit
f80ab3521c
6
.github/workflows/clang-format.yml
vendored
6
.github/workflows/clang-format.yml
vendored
@ -30,12 +30,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
EXCLUDES=(
|
EXCLUDES=(
|
||||||
'csrc/moe/topk_softmax_kernels.cu'
|
'csrc/moe/topk_softmax_kernels.cu'
|
||||||
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
|
|
||||||
'csrc/punica/bgmv/bgmv_config.h'
|
|
||||||
'csrc/punica/bgmv/bgmv_impl.cuh'
|
|
||||||
'csrc/punica/bgmv/vec_dtypes.cuh'
|
|
||||||
'csrc/punica/punica_ops.cu'
|
|
||||||
'csrc/punica/type_convert.h'
|
|
||||||
)
|
)
|
||||||
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
|
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
|
||||||
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
|
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
|
||||||
|
@ -181,7 +181,7 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
|
|||||||
#
|
#
|
||||||
# The torch cmake setup hardcodes the detected architecture flags in
|
# The torch cmake setup hardcodes the detected architecture flags in
|
||||||
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
|
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
|
||||||
# can't modified on a per-target basis, e.g. for the `punica` extension.
|
# can't modified on a per-target basis.
|
||||||
# So, all the `-gencode` flags need to be extracted and removed from
|
# So, all the `-gencode` flags need to be extracted and removed from
|
||||||
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
|
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
|
||||||
# Since it's not possible to use `target_compiler_options` for adding target
|
# Since it's not possible to use `target_compiler_options` for adding target
|
||||||
|
@ -242,12 +242,6 @@ echo 'vLLM isort: Done'
|
|||||||
# NOTE: Keep up to date with .github/workflows/clang-format.yml
|
# NOTE: Keep up to date with .github/workflows/clang-format.yml
|
||||||
CLANG_FORMAT_EXCLUDES=(
|
CLANG_FORMAT_EXCLUDES=(
|
||||||
'csrc/moe/topk_softmax_kernels.cu'
|
'csrc/moe/topk_softmax_kernels.cu'
|
||||||
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
|
|
||||||
'csrc/punica/bgmv/bgmv_config.h'
|
|
||||||
'csrc/punica/bgmv/bgmv_impl.cuh'
|
|
||||||
'csrc/punica/bgmv/vec_dtypes.cuh'
|
|
||||||
'csrc/punica/punica_ops.cu'
|
|
||||||
'csrc/punica/type_convert.h'
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Format specified files with clang-format
|
# Format specified files with clang-format
|
||||||
|
@ -1304,7 +1304,7 @@ class LoRAConfig:
|
|||||||
long_lora_scaling_factors: Optional[Tuple[float]] = None
|
long_lora_scaling_factors: Optional[Tuple[float]] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
# Keep this in sync with csrc/punica/bgmv/bgmv_config.h
|
# TODO: Increase the range of rank
|
||||||
possible_max_ranks = (8, 16, 32, 64)
|
possible_max_ranks = (8, 16, 32, 64)
|
||||||
possible_lora_extra_vocab_size = (0, 256, 512)
|
possible_lora_extra_vocab_size = (0, 256, 512)
|
||||||
if self.max_lora_rank not in possible_max_ranks:
|
if self.max_lora_rank not in possible_max_ranks:
|
||||||
|
@ -1073,7 +1073,7 @@ class LogitsProcessorWithLoRA(BaseLayerWithLoRA):
|
|||||||
lora_config: LoRAConfig,
|
lora_config: LoRAConfig,
|
||||||
model_config: Optional[PretrainedConfig] = None,
|
model_config: Optional[PretrainedConfig] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
# Keep this in sync with csrc/punica/bgmv/bgmv_config.h
|
# TODO: Verify if this condition can be relaxed
|
||||||
if 32000 < self.base_layer.vocab_size > 128512:
|
if 32000 < self.base_layer.vocab_size > 128512:
|
||||||
raise ValueError("When using LoRA, vocab size must be "
|
raise ValueError("When using LoRA, vocab size must be "
|
||||||
"32000 >= vocab_size <= 128512")
|
"32000 >= vocab_size <= 128512")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user