blob: 88e514dfb54befc008fdb8630a8b2d0a12fb5de1 [file] [log] [blame]
Chris Wilson545fe0f2016-07-11 12:39:01 +01001/*
2 * Copyright (c) 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
Chris Wilson545fe0f2016-07-11 12:39:01 +010028#include "config.h"
Chris Wilson545fe0f2016-07-11 12:39:01 +010029
30#ifdef HAVE_CPUID_H
31#include <cpuid.h>
32#else
33#define __get_cpuid_max(x, y) 0
34#define __cpuid(level, a, b, c, d) a = b = c = d = 0
35#define __cpuid_count(level, count, a, b, c, d) a = b = c = d = 0
36#endif
37
38#include "igt_x86.h"
Chris Wilson6a06d012018-02-27 21:45:14 +000039#include "igt_aux.h"
40
41#include <stdint.h>
Chris Wilson545fe0f2016-07-11 12:39:01 +010042#include <stdio.h>
Chris Wilson6a06d012018-02-27 21:45:14 +000043#include <string.h>
Chris Wilson545fe0f2016-07-11 12:39:01 +010044
Paul Kocialkowski76bce772017-07-20 17:11:52 +030045/**
46 * SECTION:igt_x86
47 * @short_description: x86 helper library
48 * @title: x86
49 * @include: igt_x86.h
50 */
51
Chris Wilson545fe0f2016-07-11 12:39:01 +010052#define BASIC_CPUID 0x0
53#define EXTENDED_CPUID 0x80000000
54
55#ifndef bit_MMX
56#define bit_MMX (1 << 23)
57#endif
58
59#ifndef bit_SSE
60#define bit_SSE (1 << 25)
61#endif
62
63#ifndef bit_SSE2
64#define bit_SSE2 (1 << 26)
65#endif
66
67#ifndef bit_SSE3
68#define bit_SSE3 (1 << 0)
69#endif
70
71#ifndef bit_SSSE3
72#define bit_SSSE3 (1 << 9)
73#endif
74
75#ifndef bit_SSE4_1
76#define bit_SSE4_1 (1 << 19)
77#endif
78
79#ifndef bit_SSE4_2
80#define bit_SSE4_2 (1 << 20)
81#endif
82
83#ifndef bit_OSXSAVE
84#define bit_OSXSAVE (1 << 27)
85#endif
86
87#ifndef bit_AVX
88#define bit_AVX (1 << 28)
89#endif
90
91#ifndef bit_AVX2
92#define bit_AVX2 (1<<5)
93#endif
94
95#define xgetbv(index,eax,edx) \
96 __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
97
98#define has_YMM 0x1
99
Thierry Reding4fbb4b92017-10-12 10:33:37 +0200100#if defined(__x86_64__) || defined(__i386__)
Chris Wilson545fe0f2016-07-11 12:39:01 +0100101unsigned igt_x86_features(void)
102{
103 unsigned max = __get_cpuid_max(BASIC_CPUID, 0);
104 unsigned eax, ebx, ecx, edx;
105 unsigned features = 0;
106 unsigned extra = 0;
107
108 if (max >= 1) {
109 __cpuid(1, eax, ebx, ecx, edx);
110
111 if (ecx & bit_SSE3)
112 features |= SSE3;
113
114 if (ecx & bit_SSSE3)
115 features |= SSSE3;
116
117 if (ecx & bit_SSE4_1)
118 features |= SSE4_1;
119
120 if (ecx & bit_SSE4_2)
121 features |= SSE4_2;
122
123 if (ecx & bit_OSXSAVE) {
124 unsigned int bv_eax, bv_ecx;
125 xgetbv(0, bv_eax, bv_ecx);
126 if ((bv_eax & 6) == 6)
127 extra |= has_YMM;
128 }
129
130 if ((extra & has_YMM) && (ecx & bit_AVX))
131 features |= AVX;
132
133 if (edx & bit_MMX)
134 features |= MMX;
135
136 if (edx & bit_SSE)
137 features |= SSE;
138
139 if (edx & bit_SSE2)
140 features |= SSE2;
141 }
142
143 if (max >= 7) {
144 __cpuid_count(7, 0, eax, ebx, ecx, edx);
145
146 if ((extra & has_YMM) && (ebx & bit_AVX2))
147 features |= AVX2;
148 }
149
150 return features;
151}
152
153char *igt_x86_features_to_string(unsigned features, char *line)
154{
155 char *ret = line;
156
157#ifdef __x86_64__
158 line += sprintf(line, "x86-64");
159#else
160 line += sprintf(line, "x86");
161#endif
162
163 if (features & SSE2)
164 line += sprintf(line, ", sse2");
165 if (features & SSE3)
166 line += sprintf(line, ", sse3");
167 if (features & SSSE3)
168 line += sprintf(line, ", ssse3");
169 if (features & SSE4_1)
170 line += sprintf(line, ", sse4.1");
171 if (features & SSE4_2)
172 line += sprintf(line, ", sse4.2");
173 if (features & AVX)
174 line += sprintf(line, ", avx");
175 if (features & AVX2)
176 line += sprintf(line, ", avx2");
177
Petri Latvala031715e2019-01-16 13:20:49 +0200178 (void)line;
179
Chris Wilson545fe0f2016-07-11 12:39:01 +0100180 return ret;
181}
Thierry Reding4fbb4b92017-10-12 10:33:37 +0200182#endif
Chris Wilson6a06d012018-02-27 21:45:14 +0000183
184#if defined(__x86_64__) && !defined(__clang__)
185#pragma GCC push_options
186#pragma GCC target("sse4.1")
187#pragma GCC diagnostic ignored "-Wpointer-arith"
188
189#include <smmintrin.h>
190static void memcpy_from_wc_sse41(void *dst, const void *src, unsigned long len)
191{
192 char buf[16];
193
194 /* Flush the internal buffer of potential stale gfx data */
195 _mm_mfence();
196
197 if ((uintptr_t)src & 15) {
198 __m128i *S = (__m128i *)((uintptr_t)src & ~15);
199 unsigned long misalign = (uintptr_t)src & 15;
200 unsigned long copy = min(len, 16 - misalign);
201
202 _mm_storeu_si128((__m128i *)buf,
203 _mm_stream_load_si128(S));
204
205 memcpy(dst, buf + misalign, copy);
206
207 dst += copy;
208 src += copy;
209 len -= copy;
210 }
211
212 /* We assume we are doing bulk transfers, so prefer aligned moves */
213 if (((uintptr_t)dst & 15) == 0) {
214 while (len >= 64) {
215 __m128i *S = (__m128i *)src;
216 __m128i *D = (__m128i *)dst;
217 __m128i tmp[4];
218
219 tmp[0] = _mm_stream_load_si128(S + 0);
220 tmp[1] = _mm_stream_load_si128(S + 1);
221 tmp[2] = _mm_stream_load_si128(S + 2);
222 tmp[3] = _mm_stream_load_si128(S + 3);
223
224 _mm_store_si128(D + 0, tmp[0]);
225 _mm_store_si128(D + 1, tmp[1]);
226 _mm_store_si128(D + 2, tmp[2]);
227 _mm_store_si128(D + 3, tmp[3]);
228
229 src += 64;
230 dst += 64;
231 len -= 64;
232 }
233 } else {
234 while (len >= 64) {
235 __m128i *S = (__m128i *)src;
236 __m128i *D = (__m128i *)dst;
237 __m128i tmp[4];
238
239 tmp[0] = _mm_stream_load_si128(S + 0);
240 tmp[1] = _mm_stream_load_si128(S + 1);
241 tmp[2] = _mm_stream_load_si128(S + 2);
242 tmp[3] = _mm_stream_load_si128(S + 3);
243
244 _mm_storeu_si128(D + 0, tmp[0]);
245 _mm_storeu_si128(D + 1, tmp[1]);
246 _mm_storeu_si128(D + 2, tmp[2]);
247 _mm_storeu_si128(D + 3, tmp[3]);
248
249 src += 64;
250 dst += 64;
251 len -= 64;
252 }
253 }
254
255 while (len >= 16) {
256 _mm_storeu_si128((__m128i *)dst,
257 _mm_stream_load_si128((__m128i *)src));
258
259 src += 16;
260 dst += 16;
261 len -= 16;
262 }
263
264 if (len) {
265 _mm_storeu_si128((__m128i *)buf,
266 _mm_stream_load_si128((__m128i *)src));
267 memcpy(dst, buf, len);
268 }
269}
270
271#pragma GCC pop_options
272
273static void memcpy_from_wc(void *dst, const void *src, unsigned long len)
274{
275 memcpy(dst, src, len);
276}
277
278static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)
279{
280 if (igt_x86_features() & SSE4_1)
281 return memcpy_from_wc_sse41;
282
283 return memcpy_from_wc;
284}
285
286void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
287 __attribute__((ifunc("resolve_memcpy_from_wc")));
288
289#else
290void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
291{
292 memcpy(dst, src, len);
293}
294#endif