[Misc] Restrict ray version dependency and update PP feature warning in V1 (#15556)
This commit is contained in:
parent
619d3de8bd
commit
df8d3d1287
@ -4,7 +4,7 @@
|
|||||||
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
||||||
|
|
||||||
# Dependencies for NVIDIA GPUs
|
# Dependencies for NVIDIA GPUs
|
||||||
ray[cgraph]>=2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
|
||||||
torch==2.6.0
|
torch==2.6.0
|
||||||
torchaudio==2.6.0
|
torchaudio==2.6.0
|
||||||
# These must be updated alongside torch
|
# These must be updated alongside torch
|
||||||
|
@ -17,7 +17,7 @@ vector_quantize_pytorch # required for minicpmo_26 test
|
|||||||
vocos # required for minicpmo_26 test
|
vocos # required for minicpmo_26 test
|
||||||
peft
|
peft
|
||||||
pqdm
|
pqdm
|
||||||
ray[cgraph]>=2.43.0 # Ray Compiled Graph, required by pipeline parallelism tests
|
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required by pipeline parallelism tests
|
||||||
sentence-transformers # required for embedding tests
|
sentence-transformers # required for embedding tests
|
||||||
soundfile # required for audio tests
|
soundfile # required for audio tests
|
||||||
jiwer # required for audio tests
|
jiwer # required for audio tests
|
||||||
|
@ -313,7 +313,7 @@ class ModelConfig:
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
|
"VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
|
||||||
"module was not found."
|
"module was not found."
|
||||||
"See https://github.com/vllm-project/vllm/blob/main/Dockerfile"
|
"See https://github.com/vllm-project/vllm/blob/main/Dockerfile "
|
||||||
"for instructions on how to install it.")
|
"for instructions on how to install it.")
|
||||||
|
|
||||||
# The tokenizer version is consistent with the model version by default.
|
# The tokenizer version is consistent with the model version by default.
|
||||||
|
@ -1686,8 +1686,11 @@ class EngineArgs:
|
|||||||
if self.enable_lora and _warn_or_fallback("LORA"):
|
if self.enable_lora and _warn_or_fallback("LORA"):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# PP is supported on V1, but off by default for now.
|
# PP is supported on V1 with Ray distributed executor,
|
||||||
if self.pipeline_parallel_size > 1 and _warn_or_fallback("PP"):
|
# but off for MP distributed executor for now.
|
||||||
|
if (self.pipeline_parallel_size > 1
|
||||||
|
and self.distributed_executor_backend == "mp"
|
||||||
|
and _warn_or_fallback("PP (MP distributed executor)")):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# ngram is supported on V1, but off by default for now.
|
# ngram is supported on V1, but off by default for now.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user