diff --git a/CMakeLists.txt b/CMakeLists.txt index c8d4aaed..217dc70c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -233,7 +233,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") # Generate sources: execute_process( COMMAND ${CMAKE_COMMAND} -E env - PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/:$PYTHONPATH + PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/:${VLLM_PYTHON_PATH}:$PYTHONPATH ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/csrc/quantization/machete/generate.py RESULT_VARIABLE machete_generation_result OUTPUT_VARIABLE machete_generation_output diff --git a/setup.py b/setup.py index ef599b61..21b0422c 100644 --- a/setup.py +++ b/setup.py @@ -184,6 +184,10 @@ class cmake_build_ext(build_ext): # match. cmake_args += ['-DVLLM_PYTHON_EXECUTABLE={}'.format(sys.executable)] + # Pass the python path to cmake so it can reuse the build dependencies + # on subsequent calls to python. + cmake_args += ['-DVLLM_PYTHON_PATH={}'.format(":".join(sys.path))] + # # Setup parallelism and build tool #