[BugFix][ROCm] Fix GGUF MoE Dispatch Block_Dim for ROCm (#16247)
Signed-off-by: Tianyuan Wu <Tianyuan.Wu@amd.com>
This commit is contained in:
parent
5a1e1c8353
commit
9351f91be9
@ -129,7 +129,7 @@ static __device__ __forceinline__ void moe_q(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q4_0 64
|
||||
#define MOE_X_Q4_0 8
|
||||
#define MOE_Y_Q4_0 128
|
||||
#define NWARPS_Q4_0 8
|
||||
#else
|
||||
@ -190,7 +190,7 @@ static void ggml_moe_q4_0_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q4_1 64
|
||||
#define MOE_X_Q4_1 8
|
||||
#define MOE_Y_Q4_1 128
|
||||
#define NWARPS_Q4_1 8
|
||||
#else
|
||||
@ -251,7 +251,7 @@ static void ggml_moe_q4_1_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q5_0 64
|
||||
#define MOE_X_Q5_0 8
|
||||
#define MOE_Y_Q5_0 128
|
||||
#define NWARPS_Q5_0 8
|
||||
#else
|
||||
@ -312,7 +312,7 @@ static void ggml_moe_q5_0_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q5_1 64
|
||||
#define MOE_X_Q5_1 8
|
||||
#define MOE_Y_Q5_1 128
|
||||
#define NWARPS_Q5_1 8
|
||||
#else
|
||||
@ -373,7 +373,7 @@ static void ggml_moe_q5_1_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q8_0 64
|
||||
#define MOE_X_Q8_0 8
|
||||
#define MOE_Y_Q8_0 128
|
||||
#define NWARPS_Q8_0 8
|
||||
#else
|
||||
@ -434,7 +434,7 @@ static void ggml_moe_q8_0_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q2_K 64
|
||||
#define MOE_X_Q2_K 8
|
||||
#define MOE_Y_Q2_K 128
|
||||
#define NWARPS_Q2_K 8
|
||||
#else
|
||||
@ -495,7 +495,7 @@ static void ggml_moe_q2_K_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q3_K 64
|
||||
#define MOE_X_Q3_K 8
|
||||
#define MOE_Y_Q3_K 128
|
||||
#define NWARPS_Q3_K 8
|
||||
#else
|
||||
@ -556,7 +556,7 @@ static void ggml_moe_q3_K_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q4_K 64
|
||||
#define MOE_X_Q4_K 8
|
||||
#define MOE_Y_Q4_K 128
|
||||
#define NWARPS_Q4_K 8
|
||||
#else
|
||||
@ -617,7 +617,7 @@ static void ggml_moe_q4_K_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q5_K 64
|
||||
#define MOE_X_Q5_K 8
|
||||
#define MOE_Y_Q5_K 128
|
||||
#define NWARPS_Q5_K 8
|
||||
#else
|
||||
@ -678,7 +678,7 @@ static void ggml_moe_q5_K_q8_1_cuda(
|
||||
}
|
||||
|
||||
#if defined(USE_ROCM)
|
||||
#define MOE_X_Q6_K 64
|
||||
#define MOE_X_Q6_K 8
|
||||
#define MOE_Y_Q6_K 128
|
||||
#define NWARPS_Q6_K 8
|
||||
#else
|
||||
|
Loading…
x
Reference in New Issue
Block a user