vllm/find_cuda_init.py

import importlib
import traceback
from typing import Callable
from unittest.mock import patch


def find_cuda_init(fn: Callable[[], object]) -> None:
    """
    Helper function to debug CUDA re-initialization errors.

    If `fn` initializes CUDA, prints the stack trace of how this happens.
    """
    from torch.cuda import _lazy_init

    stack = None

    def wrapper():
        nonlocal stack
        stack = traceback.extract_stack()
        return _lazy_init()

    with patch("torch.cuda._lazy_init", wrapper):
        fn()

    if stack is not None:
        print("==== CUDA Initialized ====")
        print("".join(traceback.format_list(stack)).strip())
        print("==========================")


if __name__ == "__main__":
    find_cuda_init(
        lambda: importlib.import_module("vllm.model_executor.models.llava"))
[Core][VLM] Test registration for OOT multimodal models (#8717) Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> 2024-10-04 10:38:25 -07:00			`import importlib`
			`import traceback`
			`from typing import Callable`
			`from unittest.mock import patch`


			`def find_cuda_init(fn: Callable[[], object]) -> None:`
			`"""`
			`Helper function to debug CUDA re-initialization errors.`

			If `fn` initializes CUDA, prints the stack trace of how this happens.
			`"""`
			`from torch.cuda import _lazy_init`

			`stack = None`

			`def wrapper():`
			`nonlocal stack`
			`stack = traceback.extract_stack()`
			`return _lazy_init()`

			`with patch("torch.cuda._lazy_init", wrapper):`
			`fn()`

			`if stack is not None:`
			`print("==== CUDA Initialized ====")`
			`print("".join(traceback.format_list(stack)).strip())`
			`print("==========================")`


			`if __name__ == "__main__":`
			`find_cuda_init(`
			`lambda: importlib.import_module("vllm.model_executor.models.llava"))`