]> www.infradead.org Git - users/jedix/linux-maple.git/blob
7986120
[users/jedix/linux-maple.git] /
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2016 Intel Corporation
4  *
5  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6  * Author: Megha Dey <megha.dey@linux.intel.com>
7  */
8
9 #include <linux/raid/pq.h>
10 #include "x86.h"
11
12 static int raid6_has_avx512(void)
13 {
14         return boot_cpu_has(X86_FEATURE_AVX2) &&
15                 boot_cpu_has(X86_FEATURE_AVX) &&
16                 boot_cpu_has(X86_FEATURE_AVX512F) &&
17                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
18                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
19                 boot_cpu_has(X86_FEATURE_AVX512DQ);
20 }
21
22 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
23                                      int failb, void **ptrs)
24 {
25         u8 *p, *q, *dp, *dq;
26         const u8 *pbmul;        /* P multiplier table for B data */
27         const u8 *qmul;         /* Q multiplier table (for both) */
28         const u8 x0f = 0x0f;
29
30         p = (u8 *)ptrs[disks-2];
31         q = (u8 *)ptrs[disks-1];
32
33         /*
34          * Compute syndrome with zero for the missing data pages
35          * Use the dead data pages as temporary storage for
36          * delta p and delta q
37          */
38
39         dp = (u8 *)ptrs[faila];
40         ptrs[faila] = raid6_get_zero_page();
41         ptrs[disks-2] = dp;
42         dq = (u8 *)ptrs[failb];
43         ptrs[failb] = raid6_get_zero_page();
44         ptrs[disks-1] = dq;
45
46         raid6_call.gen_syndrome(disks, bytes, ptrs);
47
48         /* Restore pointer table */
49         ptrs[faila]   = dp;
50         ptrs[failb]   = dq;
51         ptrs[disks-2] = p;
52         ptrs[disks-1] = q;
53
54         /* Now, pick the proper data tables */
55         pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
56         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
57                 raid6_gfexp[failb]]];
58
59         kernel_fpu_begin();
60
61         /* zmm0 = x0f[16] */
62         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
63
64         while (bytes) {
65 #ifdef CONFIG_X86_64
66                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
67                              "vmovdqa64 %1, %%zmm9\n\t"
68                              "vmovdqa64 %2, %%zmm0\n\t"
69                              "vmovdqa64 %3, %%zmm8\n\t"
70                              "vpxorq %4, %%zmm1, %%zmm1\n\t"
71                              "vpxorq %5, %%zmm9, %%zmm9\n\t"
72                              "vpxorq %6, %%zmm0, %%zmm0\n\t"
73                              "vpxorq %7, %%zmm8, %%zmm8"
74                              :
75                              : "m" (q[0]), "m" (q[64]), "m" (p[0]),
76                                "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
77                                "m" (dp[0]), "m" (dp[64]));
78
79                 /*
80                  * 1 = dq[0]  ^ q[0]
81                  * 9 = dq[64] ^ q[64]
82                  * 0 = dp[0]  ^ p[0]
83                  * 8 = dp[64] ^ p[64]
84                  */
85
86                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
87                              "vbroadcasti64x2 %1, %%zmm5"
88                              :
89                              : "m" (qmul[0]), "m" (qmul[16]));
90
91                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
92                              "vpsraw $4, %%zmm9, %%zmm12\n\t"
93                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
94                              "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
95                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
96                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
97                              "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
98                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
99                              "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
100                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
101                              "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
102                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
103                              :
104                              : );
105
106                 /*
107                  * 5 = qx[0]
108                  * 15 = qx[64]
109                  */
110
111                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
112                              "vbroadcasti64x2 %1, %%zmm1\n\t"
113                              "vpsraw $4, %%zmm0, %%zmm2\n\t"
114                              "vpsraw $4, %%zmm8, %%zmm6\n\t"
115                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
116                              "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
117                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
118                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
119                              "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
120                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
121                              "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
122                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
123                              "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
124                              "vpxorq %%zmm12, %%zmm13, %%zmm13"
125                              :
126                              : "m" (pbmul[0]), "m" (pbmul[16]));
127
128                 /*
129                  * 1  = pbmul[px[0]]
130                  * 13 = pbmul[px[64]]
131                  */
132                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
133                              "vpxorq %%zmm15, %%zmm13, %%zmm13"
134                              :
135                              : );
136
137                 /*
138                  * 1 = db = DQ
139                  * 13 = db[64] = DQ[64]
140                  */
141                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
142                              "vmovdqa64 %%zmm13,%1\n\t"
143                              "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
144                              "vpxorq %%zmm13, %%zmm8, %%zmm8"
145                              :
146                              : "m" (dq[0]), "m" (dq[64]));
147
148                 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
149                              "vmovdqa64 %%zmm8, %1"
150                              :
151                              : "m" (dp[0]), "m" (dp[64]));
152
153                 bytes -= 128;
154                 p += 128;
155                 q += 128;
156                 dp += 128;
157                 dq += 128;
158 #else
159                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
160                              "vmovdqa64 %1, %%zmm0\n\t"
161                              "vpxorq %2, %%zmm1, %%zmm1\n\t"
162                              "vpxorq %3, %%zmm0, %%zmm0"
163                              :
164                              : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
165
166                 /* 1 = dq ^ q;  0 = dp ^ p */
167
168                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
169                              "vbroadcasti64x2 %1, %%zmm5"
170                              :
171                              : "m" (qmul[0]), "m" (qmul[16]));
172
173                 /*
174                  * 1 = dq ^ q
175                  * 3 = dq ^ p >> 4
176                  */
177                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
178                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
179                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
180                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
181                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
182                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
183                              :
184                              : );
185
186                 /* 5 = qx */
187
188                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
189                              "vbroadcasti64x2 %1, %%zmm1"
190                              :
191                              : "m" (pbmul[0]), "m" (pbmul[16]));
192
193                 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
194                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
195                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
196                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
197                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
198                              "vpxorq %%zmm4, %%zmm1, %%zmm1"
199                              :
200                              : );
201
202                 /* 1 = pbmul[px] */
203                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
204                              /* 1 = db = DQ */
205                              "vmovdqa64 %%zmm1, %0\n\t"
206                              :
207                              : "m" (dq[0]));
208
209                 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
210                              "vmovdqa64 %%zmm0, %0"
211                              :
212                              : "m" (dp[0]));
213
214                 bytes -= 64;
215                 p += 64;
216                 q += 64;
217                 dp += 64;
218                 dq += 64;
219 #endif
220         }
221
222         kernel_fpu_end();
223 }
224
225 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
226                                      void **ptrs)
227 {
228         u8 *p, *q, *dq;
229         const u8 *qmul;         /* Q multiplier table */
230         const u8 x0f = 0x0f;
231
232         p = (u8 *)ptrs[disks-2];
233         q = (u8 *)ptrs[disks-1];
234
235         /*
236          * Compute syndrome with zero for the missing data page
237          * Use the dead data page as temporary storage for delta q
238          */
239
240         dq = (u8 *)ptrs[faila];
241         ptrs[faila] = raid6_get_zero_page();
242         ptrs[disks-1] = dq;
243
244         raid6_call.gen_syndrome(disks, bytes, ptrs);
245
246         /* Restore pointer table */
247         ptrs[faila]   = dq;
248         ptrs[disks-1] = q;
249
250         /* Now, pick the proper data tables */
251         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
252
253         kernel_fpu_begin();
254
255         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
256
257         while (bytes) {
258 #ifdef CONFIG_X86_64
259                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
260                              "vmovdqa64 %1, %%zmm8\n\t"
261                              "vpxorq %2, %%zmm3, %%zmm3\n\t"
262                              "vpxorq %3, %%zmm8, %%zmm8"
263                              :
264                              : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
265                                "m" (q[64]));
266
267                 /*
268                  * 3 = q[0] ^ dq[0]
269                  * 8 = q[64] ^ dq[64]
270                  */
271                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
272                              "vmovapd %%zmm0, %%zmm13\n\t"
273                              "vbroadcasti64x2 %1, %%zmm1\n\t"
274                              "vmovapd %%zmm1, %%zmm14"
275                              :
276                              : "m" (qmul[0]), "m" (qmul[16]));
277
278                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
279                              "vpsraw $4, %%zmm8, %%zmm12\n\t"
280                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
281                              "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
282                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
283                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
284                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
285                              "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
286                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
287                              "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
288                              "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
289                              "vpxorq %%zmm13, %%zmm14, %%zmm14"
290                              :
291                              : );
292
293                 /*
294                  * 1  = qmul[q[0]  ^ dq[0]]
295                  * 14 = qmul[q[64] ^ dq[64]]
296                  */
297                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
298                              "vmovdqa64 %1, %%zmm12\n\t"
299                              "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
300                              "vpxorq %%zmm14, %%zmm12, %%zmm12"
301                              :
302                              : "m" (p[0]), "m" (p[64]));
303
304                 /*
305                  * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
306                  * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
307                  */
308
309                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
310                              "vmovdqa64 %%zmm14, %1\n\t"
311                              "vmovdqa64 %%zmm2, %2\n\t"
312                              "vmovdqa64 %%zmm12,%3"
313                              :
314                              : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
315                                "m" (p[64]));
316
317                 bytes -= 128;
318                 p += 128;
319                 q += 128;
320                 dq += 128;
321 #else
322                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
323                              "vpxorq %1, %%zmm3, %%zmm3"
324                              :
325                              : "m" (dq[0]), "m" (q[0]));
326
327                 /* 3 = q ^ dq */
328
329                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
330                              "vbroadcasti64x2 %1, %%zmm1"
331                              :
332                              : "m" (qmul[0]), "m" (qmul[16]));
333
334                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
335                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
336                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
337                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
338                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
339                              "vpxorq %%zmm0, %%zmm1, %%zmm1"
340                              :
341                              : );
342
343                 /* 1 = qmul[q ^ dq] */
344
345                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
346                              "vpxorq %%zmm1, %%zmm2, %%zmm2"
347                              :
348                              : "m" (p[0]));
349
350                 /* 2 = p ^ qmul[q ^ dq] */
351
352                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
353                              "vmovdqa64 %%zmm2, %1"
354                              :
355                              : "m" (dq[0]), "m" (p[0]));
356
357                 bytes -= 64;
358                 p += 64;
359                 q += 64;
360                 dq += 64;
361 #endif
362         }
363
364         kernel_fpu_end();
365 }
366
367 const struct raid6_recov_calls raid6_recov_avx512 = {
368         .data2 = raid6_2data_recov_avx512,
369         .datap = raid6_datap_recov_avx512,
370         .valid = raid6_has_avx512,
371 #ifdef CONFIG_X86_64
372         .name = "avx512x2",
373 #else
374         .name = "avx512x1",
375 #endif
376         .priority = 3,
377 };