vllm/tests/neuron/test_activation.py

# SPDX-License-Identifier: Apache-2.0

import pytest
import torch
import torch.nn.functional as F

from vllm.model_executor.layers.activation import FastGELU, SiluAndMul
from vllm.platforms import current_platform


@pytest.mark.parametrize("activation", ["silu_and_mul", "gelu_fast"])
@pytest.mark.parametrize("num_tokens,d,dtype", [
    (7, 512, torch.half),
    (7, 512, torch.float),
    (83, 512, torch.half),
])
@torch.inference_mode()
def test_act_and_mul(
    activation: str,
    num_tokens: int,
    d: int,
    dtype: torch.dtype,
) -> None:
    import torch_xla.core.xla_model as xm

    device = xm.xla_device()
    current_platform.seed_everything(0)
    torch.set_default_device("cpu")
    x = torch.randn(num_tokens, 2 * d, dtype=dtype).to(device=device)
    if activation == "silu_and_mul":
        layer = SiluAndMul()
        fn = layer.forward_native
    elif activation == "gelu_fast":
        layer = FastGELU()
        fn = F.gelu
    else:
        raise NotImplementedError(
            f"activation {activation} is not implemented.")
    assert x.is_xla, "input tensor under testing is expected to be XLA tensor."
    out = layer.to(device=device).forward_neuron(x)
    ref_out = fn(x.cpu())
    torch.testing.assert_close(out.cpu(), ref_out, atol=0.01, rtol=0.0)
[Neuron] Add custom_ops for neuron backend (#13246) Signed-off-by: Liangfu Chen <liangfc@amazon.com> Co-authored-by: George Novack <gnovack@amazon.com> Co-authored-by: Aoyu Zhang <aoyuzhan@amazon.com> 2025-02-25 11:47:49 -08:00			`# SPDX-License-Identifier: Apache-2.0`

			`import pytest`
			`import torch`
			`import torch.nn.functional as F`

			`from vllm.model_executor.layers.activation import FastGELU, SiluAndMul`
			`from vllm.platforms import current_platform`


			`@pytest.mark.parametrize("activation", ["silu_and_mul", "gelu_fast"])`
			`@pytest.mark.parametrize("num_tokens,d,dtype", [`
			`(7, 512, torch.half),`
			`(7, 512, torch.float),`
			`(83, 512, torch.half),`
			`])`
			`@torch.inference_mode()`
			`def test_act_and_mul(`
			`activation: str,`
			`num_tokens: int,`
			`d: int,`
			`dtype: torch.dtype,`
			`) -> None:`
			`import torch_xla.core.xla_model as xm`

			`device = xm.xla_device()`
			`current_platform.seed_everything(0)`
			`torch.set_default_device("cpu")`
			`x = torch.randn(num_tokens, 2 * d, dtype=dtype).to(device=device)`
			`if activation == "silu_and_mul":`
			`layer = SiluAndMul()`
			`fn = layer.forward_native`
			`elif activation == "gelu_fast":`
			`layer = FastGELU()`
			`fn = F.gelu`
			`else:`
			`raise NotImplementedError(`
			`f"activation {activation} is not implemented.")`
			`assert x.is_xla, "input tensor under testing is expected to be XLA tensor."`
			`out = layer.to(device=device).forward_neuron(x)`
			`ref_out = fn(x.cpu())`
			`torch.testing.assert_close(out.cpu(), ref_out, atol=0.01, rtol=0.0)`