blob: b1188a6e51a6668a86cd83e4e0adaffc96a49baa [file] [log] [blame]
Gayatri Kammelae0a491c2016-08-12 18:03:19 -07001/* -*- linux-c -*- --------------------------------------------------------
2 *
3 * Copyright (C) 2016 Intel Corporation
4 *
5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6 * Author: Megha Dey <megha.dey@linux.intel.com>
7 *
8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
14 * Boston MA 02111-1307, USA; either version 2 of the License, or
15 * (at your option) any later version; incorporated herein by reference.
16 *
17 * -----------------------------------------------------------------------
18 */
19
20/*
21 * AVX512 implementation of RAID-6 syndrome functions
22 *
23 */
24
25#ifdef CONFIG_AS_AVX512
26
27#include <linux/raid/pq.h>
28#include "x86.h"
29
30static const struct raid6_avx512_constants {
31 u64 x1d[8];
32} raid6_avx512_constants __aligned(512) = {
33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
36 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
37};
38
39static int raid6_have_avx512(void)
40{
41 return boot_cpu_has(X86_FEATURE_AVX2) &&
42 boot_cpu_has(X86_FEATURE_AVX) &&
43 boot_cpu_has(X86_FEATURE_AVX512F) &&
44 boot_cpu_has(X86_FEATURE_AVX512BW) &&
45 boot_cpu_has(X86_FEATURE_AVX512VL) &&
46 boot_cpu_has(X86_FEATURE_AVX512DQ);
47}
48
49static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
50{
51 u8 **dptr = (u8 **)ptrs;
52 u8 *p, *q;
53 int d, z, z0;
54
55 z0 = disks - 3; /* Highest data disk */
56 p = dptr[z0+1]; /* XOR parity */
57 q = dptr[z0+2]; /* RS syndrome */
58
59 kernel_fpu_begin();
60
61 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
62 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
63 :
64 : "m" (raid6_avx512_constants.x1d[0]));
65
66 for (d = 0; d < bytes; d += 64) {
67 asm volatile("prefetchnta %0\n\t"
68 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
69 "prefetchnta %1\n\t"
70 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
71 "vmovdqa64 %1,%%zmm6"
72 :
73 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
74 for (z = z0-2; z >= 0; z--) {
75 asm volatile("prefetchnta %0\n\t"
76 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
77 "vpmovm2b %%k1,%%zmm5\n\t"
78 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
79 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
80 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
81 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
82 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
83 "vmovdqa64 %0,%%zmm6"
84 :
85 : "m" (dptr[z][d]));
86 }
87 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
88 "vpmovm2b %%k1,%%zmm5\n\t"
89 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
90 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
91 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
92 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
93 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
94 "vmovntdq %%zmm2,%0\n\t"
95 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
96 "vmovntdq %%zmm4,%1\n\t"
97 "vpxorq %%zmm4,%%zmm4,%%zmm4"
98 :
99 : "m" (p[d]), "m" (q[d]));
100 }
101
102 asm volatile("sfence" : : : "memory");
103 kernel_fpu_end();
104}
105
106const struct raid6_calls raid6_avx512x1 = {
107 raid6_avx5121_gen_syndrome,
108 NULL, /* XOR not yet implemented */
109 raid6_have_avx512,
110 "avx512x1",
111 1 /* Has cache hints */
112};
113
114/*
115 * Unrolled-by-2 AVX512 implementation
116 */
117static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
118{
119 u8 **dptr = (u8 **)ptrs;
120 u8 *p, *q;
121 int d, z, z0;
122
123 z0 = disks - 3; /* Highest data disk */
124 p = dptr[z0+1]; /* XOR parity */
125 q = dptr[z0+2]; /* RS syndrome */
126
127 kernel_fpu_begin();
128
129 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
130 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
131 :
132 : "m" (raid6_avx512_constants.x1d[0]));
133
134 /* We uniformly assume a single prefetch covers at least 64 bytes */
135 for (d = 0; d < bytes; d += 128) {
136 asm volatile("prefetchnta %0\n\t"
137 "prefetchnta %1\n\t"
138 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
139 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
140 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
141 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
142 :
143 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
144 for (z = z0-1; z >= 0; z--) {
145 asm volatile("prefetchnta %0\n\t"
146 "prefetchnta %1\n\t"
147 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
148 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
149 "vpmovm2b %%k1,%%zmm5\n\t"
150 "vpmovm2b %%k2,%%zmm7\n\t"
151 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
152 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
153 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
154 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
155 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
156 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
157 "vmovdqa64 %0,%%zmm5\n\t"
158 "vmovdqa64 %1,%%zmm7\n\t"
159 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
160 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
161 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
162 "vpxorq %%zmm7,%%zmm6,%%zmm6"
163 :
164 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
165 }
166 asm volatile("vmovntdq %%zmm2,%0\n\t"
167 "vmovntdq %%zmm3,%1\n\t"
168 "vmovntdq %%zmm4,%2\n\t"
169 "vmovntdq %%zmm6,%3"
170 :
171 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
172 "m" (q[d+64]));
173 }
174
175 asm volatile("sfence" : : : "memory");
176 kernel_fpu_end();
177}
178
179const struct raid6_calls raid6_avx512x2 = {
180 raid6_avx5122_gen_syndrome,
181 NULL, /* XOR not yet implemented */
182 raid6_have_avx512,
183 "avx512x2",
184 1 /* Has cache hints */
185};
186
187#ifdef CONFIG_X86_64
188
189/*
190 * Unrolled-by-4 AVX2 implementation
191 */
192static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
193{
194 u8 **dptr = (u8 **)ptrs;
195 u8 *p, *q;
196 int d, z, z0;
197
198 z0 = disks - 3; /* Highest data disk */
199 p = dptr[z0+1]; /* XOR parity */
200 q = dptr[z0+2]; /* RS syndrome */
201
202 kernel_fpu_begin();
203
204 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
205 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
206 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
207 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
208 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
209 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
210 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
211 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
212 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
213 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
214 :
215 : "m" (raid6_avx512_constants.x1d[0]));
216
217 for (d = 0; d < bytes; d += 256) {
218 for (z = z0; z >= 0; z--) {
219 asm volatile("prefetchnta %0\n\t"
220 "prefetchnta %1\n\t"
221 "prefetchnta %2\n\t"
222 "prefetchnta %3\n\t"
223 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
224 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
225 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
226 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
227 "vpmovm2b %%k1,%%zmm5\n\t"
228 "vpmovm2b %%k2,%%zmm7\n\t"
229 "vpmovm2b %%k3,%%zmm13\n\t"
230 "vpmovm2b %%k4,%%zmm15\n\t"
231 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
232 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
233 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
234 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
235 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
236 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
237 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
238 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
239 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
240 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
241 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
242 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
243 "vmovdqa64 %0,%%zmm5\n\t"
244 "vmovdqa64 %1,%%zmm7\n\t"
245 "vmovdqa64 %2,%%zmm13\n\t"
246 "vmovdqa64 %3,%%zmm15\n\t"
247 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
248 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
249 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
250 "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
251 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
252 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
253 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
254 "vpxorq %%zmm15,%%zmm14,%%zmm14"
255 :
256 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
257 "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
258 }
259 asm volatile("vmovntdq %%zmm2,%0\n\t"
260 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
261 "vmovntdq %%zmm3,%1\n\t"
262 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
263 "vmovntdq %%zmm10,%2\n\t"
264 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
265 "vmovntdq %%zmm11,%3\n\t"
266 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
267 "vmovntdq %%zmm4,%4\n\t"
268 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
269 "vmovntdq %%zmm6,%5\n\t"
270 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
271 "vmovntdq %%zmm12,%6\n\t"
272 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
273 "vmovntdq %%zmm14,%7\n\t"
274 "vpxorq %%zmm14,%%zmm14,%%zmm14"
275 :
276 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
277 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
278 "m" (q[d+128]), "m" (q[d+192]));
279 }
280
281 asm volatile("sfence" : : : "memory");
282 kernel_fpu_end();
283}
284
285const struct raid6_calls raid6_avx512x4 = {
286 raid6_avx5124_gen_syndrome,
287 NULL, /* XOR not yet implemented */
288 raid6_have_avx512,
289 "avx512x4",
290 1 /* Has cache hints */
291};
292#endif
293
294#endif /* CONFIG_AS_AVX512 */