vllm/tests/compile/backend.py

from copy import deepcopy
from typing import Callable, Union

from torch import fx

from vllm.compilation.inductor_pass import InductorPass


class TestBackend:
    """
    This class provides a simple Inductor backend that can be used for testing.
    It takes a list of custom passes and runs them after Inductor's passes.
    It also saves the graph before and after the custom passes for inspection.
    """

    def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
                                                             None]]):
        self.custom_passes = list(passes)
        from torch._inductor import config
        self.current_config = config.shallow_copy_dict()
        self.current_config['force_disable_caches'] = True
        self.current_config['post_grad_custom_post_pass'] = self.post_pass

    def __call__(self, graph: fx.GraphModule, example_inputs):
        from torch._inductor.compile_fx import compile_fx
        return compile_fx(graph,
                          example_inputs,
                          config_patches=self.current_config)

    def post_pass(self, graph: fx.Graph):
        self.graph_pre_pass = deepcopy(graph)
        for pass_ in self.custom_passes:
            pass_(graph)

        self.graph_post_pass = deepcopy(graph)
        # assign by reference, will reflect the final state of the graph
        self.final_graph = graph
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00			`from copy import deepcopy`
[torch.compile] Inductor code caching fix (#10273) Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> 2024-11-21 00:44:57 -05:00			`from typing import Callable, Union`
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00
[torch.compile] Inductor code caching fix (#10273) Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> 2024-11-21 00:44:57 -05:00			`from torch import fx`

			`from vllm.compilation.inductor_pass import InductorPass`
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00

			`class TestBackend:`
			`"""`
			`This class provides a simple Inductor backend that can be used for testing.`
			`It takes a list of custom passes and runs them after Inductor's passes.`
			`It also saves the graph before and after the custom passes for inspection.`
			`"""`

[torch.compile] Inductor code caching fix (#10273) Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> 2024-11-21 00:44:57 -05:00			`def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],`
			`None]]):`
			`self.custom_passes = list(passes)`
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00			`from torch._inductor import config`
			`self.current_config = config.shallow_copy_dict()`
[torch.compile] Inductor code caching fix (#10273) Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> 2024-11-21 00:44:57 -05:00			`self.current_config['force_disable_caches'] = True`
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00			`self.current_config['post_grad_custom_post_pass'] = self.post_pass`

[torch.compile] Inductor code caching fix (#10273) Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> 2024-11-21 00:44:57 -05:00			`def __call__(self, graph: fx.GraphModule, example_inputs):`
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00			`from torch._inductor.compile_fx import compile_fx`
			`return compile_fx(graph,`
			`example_inputs,`
			`config_patches=self.current_config)`

[torch.compile] Inductor code caching fix (#10273) Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> 2024-11-21 00:44:57 -05:00			`def post_pass(self, graph: fx.Graph):`
[torch.compile] Fuse RMSNorm with quant (#9138) Signed-off-by: luka <luka@neuralmagic.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-11-08 16:20:08 -05:00			`self.graph_pre_pass = deepcopy(graph)`
			`for pass_ in self.custom_passes:`
			`pass_(graph)`

			`self.graph_post_pass = deepcopy(graph)`
			`# assign by reference, will reflect the final state of the graph`
			`self.final_graph = graph`