2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
2024-11-08 16:20:08 -05:00
|
|
|
from copy import deepcopy
|
2024-11-21 00:44:57 -05:00
|
|
|
from typing import Callable, Union
|
2024-11-08 16:20:08 -05:00
|
|
|
|
2024-11-21 00:44:57 -05:00
|
|
|
from torch import fx
|
|
|
|
|
|
|
|
from vllm.compilation.inductor_pass import InductorPass
|
2025-03-14 16:58:30 -04:00
|
|
|
from vllm.config import get_current_vllm_config
|
2024-11-08 16:20:08 -05:00
|
|
|
|
|
|
|
|
|
|
|
class TestBackend:
|
|
|
|
"""
|
|
|
|
This class provides a simple Inductor backend that can be used for testing.
|
|
|
|
It takes a list of custom passes and runs them after Inductor's passes.
|
|
|
|
It also saves the graph before and after the custom passes for inspection.
|
2025-02-28 18:20:11 -05:00
|
|
|
|
|
|
|
Inductor config can be modified directly by editing the inductor_config
|
|
|
|
property. This can be helpful for adding passes like the
|
|
|
|
'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'.
|
2025-03-14 16:58:30 -04:00
|
|
|
Inductor config is default-initialized from VllmConfig.CompilationConfig.
|
2024-11-08 16:20:08 -05:00
|
|
|
"""
|
|
|
|
|
2024-11-21 00:44:57 -05:00
|
|
|
def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
|
|
|
|
None]]):
|
|
|
|
self.custom_passes = list(passes)
|
2025-03-14 16:58:30 -04:00
|
|
|
compile_config = get_current_vllm_config().compilation_config
|
|
|
|
self.inductor_config = compile_config.inductor_compile_config
|
2025-02-28 18:20:11 -05:00
|
|
|
self.inductor_config['force_disable_caches'] = True
|
|
|
|
self.inductor_config['post_grad_custom_post_pass'] = self.post_pass
|
2024-11-08 16:20:08 -05:00
|
|
|
|
2024-11-21 00:44:57 -05:00
|
|
|
def __call__(self, graph: fx.GraphModule, example_inputs):
|
2025-02-28 18:20:11 -05:00
|
|
|
self.graph_pre_compile = deepcopy(graph)
|
2024-11-08 16:20:08 -05:00
|
|
|
from torch._inductor.compile_fx import compile_fx
|
|
|
|
return compile_fx(graph,
|
|
|
|
example_inputs,
|
2025-02-28 18:20:11 -05:00
|
|
|
config_patches=self.inductor_config)
|
2024-11-08 16:20:08 -05:00
|
|
|
|
2024-11-21 00:44:57 -05:00
|
|
|
def post_pass(self, graph: fx.Graph):
|
2024-11-08 16:20:08 -05:00
|
|
|
self.graph_pre_pass = deepcopy(graph)
|
|
|
|
for pass_ in self.custom_passes:
|
|
|
|
pass_(graph)
|
|
|
|
|
|
|
|
self.graph_post_pass = deepcopy(graph)
|
|
|
|
# assign by reference, will reflect the final state of the graph
|
|
|
|
self.final_graph = graph
|