]> www.infradead.org Git - users/hch/misc.git/commitdiff
raid6: riscv: replace one load with a move to speed up the caculation
authorChunyan Zhang <zhangchunyan@iscas.ac.cn>
Fri, 18 Jul 2025 07:27:08 +0000 (15:27 +0800)
committerPaul Walmsley <pjw@kernel.org>
Tue, 16 Sep 2025 22:43:27 +0000 (16:43 -0600)
Since wp$$==wq$$, it doesn't need to load the same data twice, use move
instruction to replace one of the loads to let the program run faster.

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
Link: https://lore.kernel.org/r/20250718072711.3865118-3-zhangchunyan@iscas.ac.cn
Signed-off-by: Paul Walmsley <pjw@kernel.org>
lib/raid6/rvv.c

index b193ea176d5d33908beab20c7bdb41cd752701ac..89da5fc247aa942c880f79322412099e168fa877 100644 (file)
@@ -44,7 +44,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
@@ -117,7 +117,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
@@ -218,9 +218,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              "vle8.v   v4, (%[wp1])\n"
-                             "vle8.v   v5, (%[wp1])\n"
+                             "vmv.v.v  v5, v4\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
@@ -310,9 +310,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              "vle8.v   v4, (%[wp1])\n"
-                             "vle8.v   v5, (%[wp1])\n"
+                             "vmv.v.v  v5, v4\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
@@ -440,13 +440,13 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              "vle8.v   v4, (%[wp1])\n"
-                             "vle8.v   v5, (%[wp1])\n"
+                             "vmv.v.v  v5, v4\n"
                              "vle8.v   v8, (%[wp2])\n"
-                             "vle8.v   v9, (%[wp2])\n"
+                             "vmv.v.v  v9, v8\n"
                              "vle8.v   v12, (%[wp3])\n"
-                             "vle8.v   v13, (%[wp3])\n"
+                             "vmv.v.v  v13, v12\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
@@ -566,13 +566,13 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              "vle8.v   v4, (%[wp1])\n"
-                             "vle8.v   v5, (%[wp1])\n"
+                             "vmv.v.v  v5, v4\n"
                              "vle8.v   v8, (%[wp2])\n"
-                             "vle8.v   v9, (%[wp2])\n"
+                             "vmv.v.v  v9, v8\n"
                              "vle8.v   v12, (%[wp3])\n"
-                             "vle8.v   v13, (%[wp3])\n"
+                             "vmv.v.v  v13, v12\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
@@ -754,21 +754,21 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              "vle8.v   v4, (%[wp1])\n"
-                             "vle8.v   v5, (%[wp1])\n"
+                             "vmv.v.v  v5, v4\n"
                              "vle8.v   v8, (%[wp2])\n"
-                             "vle8.v   v9, (%[wp2])\n"
+                             "vmv.v.v  v9, v8\n"
                              "vle8.v   v12, (%[wp3])\n"
-                             "vle8.v   v13, (%[wp3])\n"
+                             "vmv.v.v  v13, v12\n"
                              "vle8.v   v16, (%[wp4])\n"
-                             "vle8.v   v17, (%[wp4])\n"
+                             "vmv.v.v  v17, v16\n"
                              "vle8.v   v20, (%[wp5])\n"
-                             "vle8.v   v21, (%[wp5])\n"
+                             "vmv.v.v  v21, v20\n"
                              "vle8.v   v24, (%[wp6])\n"
-                             "vle8.v   v25, (%[wp6])\n"
+                             "vmv.v.v  v25, v24\n"
                              "vle8.v   v28, (%[wp7])\n"
-                             "vle8.v   v29, (%[wp7])\n"
+                             "vmv.v.v  v29, v28\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
@@ -948,21 +948,21 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
                asm volatile (".option  push\n"
                              ".option  arch,+v\n"
                              "vle8.v   v0, (%[wp0])\n"
-                             "vle8.v   v1, (%[wp0])\n"
+                             "vmv.v.v  v1, v0\n"
                              "vle8.v   v4, (%[wp1])\n"
-                             "vle8.v   v5, (%[wp1])\n"
+                             "vmv.v.v  v5, v4\n"
                              "vle8.v   v8, (%[wp2])\n"
-                             "vle8.v   v9, (%[wp2])\n"
+                             "vmv.v.v  v9, v8\n"
                              "vle8.v   v12, (%[wp3])\n"
-                             "vle8.v   v13, (%[wp3])\n"
+                             "vmv.v.v  v13, v12\n"
                              "vle8.v   v16, (%[wp4])\n"
-                             "vle8.v   v17, (%[wp4])\n"
+                             "vmv.v.v  v17, v16\n"
                              "vle8.v   v20, (%[wp5])\n"
-                             "vle8.v   v21, (%[wp5])\n"
+                             "vmv.v.v  v21, v20\n"
                              "vle8.v   v24, (%[wp6])\n"
-                             "vle8.v   v25, (%[wp6])\n"
+                             "vmv.v.v  v25, v24\n"
                              "vle8.v   v28, (%[wp7])\n"
-                             "vle8.v   v29, (%[wp7])\n"
+                             "vmv.v.v  v29, v28\n"
                              ".option  pop\n"
                              : :
                              [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),