[Bugfix][Kernel][CPU] Fix num_tokens in CPU rotary embedding kernel (#14667)

Signed-off-by: Thien Tran <gau.nernst@yahoo.com.sg>
2025-03-14 14:47:49 +08:00 · 2025-03-14 14:47:49 +08:00 · 27b50f1fe6
commit 27b50f1fe6
parent 9532c49836
1 changed files with 1 additions and 1 deletions
--- a/csrc/cpu/pos_encoding.cpp
+++ b/csrc/cpu/pos_encoding.cpp
@ -170,7 +170,7 @@ void rotary_embedding_gptj_impl(
 void rotary_embedding(torch::Tensor& positions, torch::Tensor& query,
                      torch::Tensor& key, int64_t head_size,
                      torch::Tensor& cos_sin_cache, bool is_neox) {
-  int num_tokens = query.numel() / query.size(-1);
+  int num_tokens = positions.numel();
  int rot_dim = cos_sin_cache.size(1);
  int num_heads = query.size(-1) / head_size;
  int num_kv_heads = key.size(-1) / head_size;