Craig Topper | 2ee2ac2 | 2012-01-25 04:26:17 +0000 | [diff] [blame] | 1 | // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s |
| 2 | |
| 3 | // Don't include mm_malloc.h, it's system specific. |
| 4 | #define __MM_MALLOC_H |
| 5 | |
| 6 | #include <immintrin.h> |
| 7 | |
| 8 | // |
| 9 | // Test LLVM IR codegen of shuffle instructions |
| 10 | // |
| 11 | |
| 12 | __m256 test__mm256_loadu_ps(void* p) { |
Pirama Arumuga Nainar | 3ea9e33 | 2015-04-08 08:57:32 -0700 | [diff] [blame] | 13 | // CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1 |
Craig Topper | 2ee2ac2 | 2012-01-25 04:26:17 +0000 | [diff] [blame] | 14 | return _mm256_loadu_ps(p); |
| 15 | } |
| 16 | |
| 17 | __m256d test__mm256_loadu_pd(void* p) { |
Pirama Arumuga Nainar | 3ea9e33 | 2015-04-08 08:57:32 -0700 | [diff] [blame] | 18 | // CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1 |
Craig Topper | 2ee2ac2 | 2012-01-25 04:26:17 +0000 | [diff] [blame] | 19 | return _mm256_loadu_pd(p); |
| 20 | } |
| 21 | |
| 22 | __m256i test__mm256_loadu_si256(void* p) { |
Pirama Arumuga Nainar | 3ea9e33 | 2015-04-08 08:57:32 -0700 | [diff] [blame] | 23 | // CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1 |
Craig Topper | 2ee2ac2 | 2012-01-25 04:26:17 +0000 | [diff] [blame] | 24 | return _mm256_loadu_si256(p); |
| 25 | } |
Craig Topper | 427435f | 2012-08-06 07:07:06 +0000 | [diff] [blame] | 26 | |
| 27 | __m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) { |
| 28 | // CHECK: @llvm.x86.sse42.pcmpestrm128 |
| 29 | return _mm_cmpestrm(A, LA, B, LB, 7); |
| 30 | } |
| 31 | |
| 32 | int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) { |
| 33 | // CHECK: @llvm.x86.sse42.pcmpestri128 |
| 34 | return _mm_cmpestri(A, LA, B, LB, 7); |
| 35 | } |
| 36 | |
| 37 | int test_mm_cmpestra(__m128i A, int LA, __m128i B, int LB) { |
| 38 | // CHECK: @llvm.x86.sse42.pcmpestria128 |
| 39 | return _mm_cmpestra(A, LA, B, LB, 7); |
| 40 | } |
| 41 | |
| 42 | int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) { |
| 43 | // CHECK: @llvm.x86.sse42.pcmpestric128 |
| 44 | return _mm_cmpestrc(A, LA, B, LB, 7); |
| 45 | } |
| 46 | |
| 47 | int test_mm_cmpestro(__m128i A, int LA, __m128i B, int LB) { |
| 48 | // CHECK: @llvm.x86.sse42.pcmpestrio128 |
| 49 | return _mm_cmpestro(A, LA, B, LB, 7); |
| 50 | } |
| 51 | |
| 52 | int test_mm_cmpestrs(__m128i A, int LA, __m128i B, int LB) { |
| 53 | // CHECK: @llvm.x86.sse42.pcmpestris128 |
| 54 | return _mm_cmpestrs(A, LA, B, LB, 7); |
| 55 | } |
| 56 | |
| 57 | int test_mm_cmpestrz(__m128i A, int LA, __m128i B, int LB) { |
| 58 | // CHECK: @llvm.x86.sse42.pcmpestriz128 |
| 59 | return _mm_cmpestrz(A, LA, B, LB, 7); |
| 60 | } |
| 61 | |
| 62 | __m128i test_mm_cmpistrm(__m128i A, __m128i B) { |
| 63 | // CHECK: @llvm.x86.sse42.pcmpistrm128 |
| 64 | return _mm_cmpistrm(A, B, 7); |
| 65 | } |
| 66 | |
| 67 | int test_mm_cmpistri(__m128i A, __m128i B) { |
| 68 | // CHECK: @llvm.x86.sse42.pcmpistri128 |
| 69 | return _mm_cmpistri(A, B, 7); |
| 70 | } |
| 71 | |
| 72 | int test_mm_cmpistra(__m128i A, __m128i B) { |
| 73 | // CHECK: @llvm.x86.sse42.pcmpistria128 |
| 74 | return _mm_cmpistra(A, B, 7); |
| 75 | } |
| 76 | |
| 77 | int test_mm_cmpistrc(__m128i A, __m128i B) { |
| 78 | // CHECK: @llvm.x86.sse42.pcmpistric128 |
| 79 | return _mm_cmpistrc(A, B, 7); |
| 80 | } |
| 81 | |
| 82 | int test_mm_cmpistro(__m128i A, __m128i B) { |
| 83 | // CHECK: @llvm.x86.sse42.pcmpistrio128 |
| 84 | return _mm_cmpistro(A, B, 7); |
| 85 | } |
| 86 | |
| 87 | int test_mm_cmpistrs(__m128i A, __m128i B) { |
| 88 | // CHECK: @llvm.x86.sse42.pcmpistris128 |
| 89 | return _mm_cmpistrs(A, B, 7); |
| 90 | } |
| 91 | |
| 92 | int test_mm_cmpistrz(__m128i A, __m128i B) { |
| 93 | // CHECK: @llvm.x86.sse42.pcmpistriz128 |
| 94 | return _mm_cmpistrz(A, B, 7); |
| 95 | } |
Manman Ren | 8484375 | 2013-10-23 20:33:14 +0000 | [diff] [blame] | 96 | |
| 97 | int test_extract_epi32(__m256i __a) { |
| 98 | // CHECK-LABEL: @test_extract_epi32 |
| 99 | // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 |
| 100 | return _mm256_extract_epi32(__a, 8); |
| 101 | } |
| 102 | |
| 103 | int test_extract_epi16(__m256i __a) { |
| 104 | // CHECK-LABEL: @test_extract_epi16 |
| 105 | // CHECK: extractelement <16 x i16> %{{.*}}, i32 0 |
| 106 | return _mm256_extract_epi16(__a, 16); |
| 107 | } |
| 108 | |
| 109 | int test_extract_epi8(__m256i __a) { |
| 110 | // CHECK-LABEL: @test_extract_epi8 |
| 111 | // CHECK: extractelement <32 x i8> %{{.*}}, i32 0 |
| 112 | return _mm256_extract_epi8(__a, 32); |
| 113 | } |
Stephen Hines | 6bcf27b | 2014-05-29 04:14:42 -0700 | [diff] [blame] | 114 | |
| 115 | __m256d test_256_blend_pd(__m256d __a, __m256d __b) { |
| 116 | // CHECK-LABEL: @test_256_blend_pd |
| 117 | // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3> |
| 118 | return _mm256_blend_pd(__a, __b, 0x35); |
| 119 | } |
| 120 | |
| 121 | __m256 test_256_blend_ps(__m256 __a, __m256 __b) { |
| 122 | // CHECK-LABEL: @test_256_blend_ps |
| 123 | // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7> |
| 124 | return _mm256_blend_ps(__a, __b, 0x35); |
| 125 | } |
Stephen Hines | 0e2c34f | 2015-03-23 12:09:02 -0700 | [diff] [blame] | 126 | |
| 127 | __m256i test_256_insert_epi8(__m256i __a) { |
| 128 | // CHECK-LABEL: @test_256_insert_epi8 |
| 129 | // CHECK: insertelement <32 x i8> {{.*}}, i8 {{.*}}, i32 {{.*}} |
| 130 | return _mm256_insert_epi8(__a, 42, 3); |
| 131 | } |
| 132 | |
| 133 | __m256i test_256_insert_epi16(__m256i __a) { |
| 134 | // CHECK-LABEL: @test_256_insert_epi16 |
| 135 | // CHECK: insertelement <16 x i16> {{.*}}, i16 {{.*}}, i32 {{.*}} |
| 136 | return _mm256_insert_epi16(__a, 42, 3); |
| 137 | } |
| 138 | |
| 139 | __m256i test_256_insert_epi32(__m256i __a) { |
| 140 | // CHECK-LABEL: @test_256_insert_epi32 |
| 141 | // CHECK: insertelement <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}} |
| 142 | return _mm256_insert_epi32(__a, 42, 3); |
| 143 | } |
| 144 | |
| 145 | __m256i test_256_insert_epi64(__m256i __a) { |
| 146 | // CHECK-LABEL: @test_256_insert_epi64 |
| 147 | // CHECK: insertelement <4 x i64> {{.*}}, i64 {{.*}}, i32 {{.*}} |
| 148 | return _mm256_insert_epi64(__a, 42, 3); |
| 149 | } |