vllm/csrc/quantization.cpp

#include <torch/extension.h>

torch::Tensor awq_gemm(
  torch::Tensor _in_feats,
  torch::Tensor _kernel,
  torch::Tensor _scaling_factors,
  torch::Tensor _zeros,
  int split_k_iters);

void squeezellm_gemm(
  torch::Tensor vec,
  torch::Tensor mat,
  torch::Tensor mul,
  torch::Tensor lookup_table);

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");
  m.def("squeezellm_gemm", &squeezellm_gemm, "Quantized GEMM for SqueezeLLM");
}
Implement AWQ quantization support for LLaMA (#1032) Co-authored-by: Robert Irvine <robert@seamlessml.com> Co-authored-by: root <rirv938@gmail.com> Co-authored-by: Casper <casperbh.96@gmail.com> Co-authored-by: julian-q <julianhquevedo@gmail.com> 2023-09-16 00:03:37 -07:00			`#include <torch/extension.h>`

			`torch::Tensor awq_gemm(`
			`torch::Tensor _in_feats,`
			`torch::Tensor _kernel,`
			`torch::Tensor _scaling_factors,`
			`torch::Tensor _zeros,`
			`int split_k_iters);`

Support SqueezeLLM (#1326) Co-authored-by: squeeze-ai-lab <squeezeailab.bair@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> 2023-10-22 03:14:59 -03:00			`void squeezellm_gemm(`
			`torch::Tensor vec,`
			`torch::Tensor mat,`
			`torch::Tensor mul,`
			`torch::Tensor lookup_table);`

Implement AWQ quantization support for LLaMA (#1032) Co-authored-by: Robert Irvine <robert@seamlessml.com> Co-authored-by: root <rirv938@gmail.com> Co-authored-by: Casper <casperbh.96@gmail.com> Co-authored-by: julian-q <julianhquevedo@gmail.com> 2023-09-16 00:03:37 -07:00			`PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {`
Support SqueezeLLM (#1326) Co-authored-by: squeeze-ai-lab <squeezeailab.bair@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> 2023-10-22 03:14:59 -03:00			`m.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");`
			`m.def("squeezellm_gemm", &squeezellm_gemm, "Quantized GEMM for SqueezeLLM");`
Implement AWQ quantization support for LLaMA (#1032) Co-authored-by: Robert Irvine <robert@seamlessml.com> Co-authored-by: root <rirv938@gmail.com> Co-authored-by: Casper <casperbh.96@gmail.com> Co-authored-by: julian-q <julianhquevedo@gmail.com> 2023-09-16 00:03:37 -07:00			`}`