vllm/tests/neuron/test_activation.py
Liangfu Chen f75aa72732
[Neuron] Add custom_ops for neuron backend (#13246)
Signed-off-by: Liangfu Chen <liangfc@amazon.com>
Co-authored-by: George Novack <gnovack@amazon.com>
Co-authored-by: Aoyu Zhang <aoyuzhan@amazon.com>
2025-02-25 11:47:49 -08:00

43 lines
1.3 KiB
Python

# SPDX-License-Identifier: Apache-2.0
import pytest
import torch
import torch.nn.functional as F
from vllm.model_executor.layers.activation import FastGELU, SiluAndMul
from vllm.platforms import current_platform
@pytest.mark.parametrize("activation", ["silu_and_mul", "gelu_fast"])
@pytest.mark.parametrize("num_tokens,d,dtype", [
(7, 512, torch.half),
(7, 512, torch.float),
(83, 512, torch.half),
])
@torch.inference_mode()
def test_act_and_mul(
activation: str,
num_tokens: int,
d: int,
dtype: torch.dtype,
) -> None:
import torch_xla.core.xla_model as xm
device = xm.xla_device()
current_platform.seed_everything(0)
torch.set_default_device("cpu")
x = torch.randn(num_tokens, 2 * d, dtype=dtype).to(device=device)
if activation == "silu_and_mul":
layer = SiluAndMul()
fn = layer.forward_native
elif activation == "gelu_fast":
layer = FastGELU()
fn = F.gelu
else:
raise NotImplementedError(
f"activation {activation} is not implemented.")
assert x.is_xla, "input tensor under testing is expected to be XLA tensor."
out = layer.to(device=device).forward_neuron(x)
ref_out = fn(x.cpu())
torch.testing.assert_close(out.cpu(), ref_out, atol=0.01, rtol=0.0)