Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL |
| 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX |
| 4 | |
| 5 | declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) |
| 6 | declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) |
| 7 | declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) |
| 8 | declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) |
| 9 | |
Simon Pilgrim | b648525 | 2017-07-20 13:07:37 +0000 | [diff] [blame^] | 10 | declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) |
| 11 | declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) |
| 12 | |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 13 | ; Tests showing replacement of variable rotates with immediate splat versions. |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 14 | |
| 15 | define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { |
| 16 | ; KNL-LABEL: test_splat_rol_v16i32: |
| 17 | ; KNL: # BB#0: |
| 18 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 19 | ; KNL-NEXT: vprold $5, %zmm0, %zmm1 {%k1} |
| 20 | ; KNL-NEXT: vprold $5, %zmm0, %zmm2 {%k1} {z} |
| 21 | ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
| 22 | ; KNL-NEXT: vprold $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 23 | ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 24 | ; KNL-NEXT: retq |
| 25 | ; |
| 26 | ; SKX-LABEL: test_splat_rol_v16i32: |
| 27 | ; SKX: # BB#0: |
| 28 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 29 | ; SKX-NEXT: vprold $5, %zmm0, %zmm1 {%k1} |
| 30 | ; SKX-NEXT: vprold $5, %zmm0, %zmm2 {%k1} {z} |
| 31 | ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
| 32 | ; SKX-NEXT: vprold $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 33 | ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 34 | ; SKX-NEXT: retq |
| 35 | %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2) |
| 36 | %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2) |
| 37 | %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1) |
| 38 | %res3 = add <16 x i32> %res, %res1 |
| 39 | %res4 = add <16 x i32> %res3, %res2 |
| 40 | ret <16 x i32> %res4 |
| 41 | } |
| 42 | |
| 43 | define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { |
| 44 | ; KNL-LABEL: test_splat_rol_v8i64: |
| 45 | ; KNL: # BB#0: |
| 46 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 47 | ; KNL-NEXT: vprolq $5, %zmm0, %zmm1 {%k1} |
| 48 | ; KNL-NEXT: vprolq $5, %zmm0, %zmm2 {%k1} {z} |
| 49 | ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
| 50 | ; KNL-NEXT: vprolq $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 51 | ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 52 | ; KNL-NEXT: retq |
| 53 | ; |
| 54 | ; SKX-LABEL: test_splat_rol_v8i64: |
| 55 | ; SKX: # BB#0: |
| 56 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 57 | ; SKX-NEXT: vprolq $5, %zmm0, %zmm1 {%k1} |
| 58 | ; SKX-NEXT: vprolq $5, %zmm0, %zmm2 {%k1} {z} |
| 59 | ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
| 60 | ; SKX-NEXT: vprolq $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 61 | ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 62 | ; SKX-NEXT: retq |
| 63 | %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2) |
| 64 | %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2) |
| 65 | %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1) |
| 66 | %res3 = add <8 x i64> %res, %res1 |
| 67 | %res4 = add <8 x i64> %res3, %res2 |
| 68 | ret <8 x i64> %res4 |
| 69 | } |
| 70 | |
| 71 | define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { |
| 72 | ; KNL-LABEL: test_splat_ror_v16i32: |
| 73 | ; KNL: # BB#0: |
| 74 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 75 | ; KNL-NEXT: vprord $5, %zmm0, %zmm1 {%k1} |
| 76 | ; KNL-NEXT: vprord $5, %zmm0, %zmm2 {%k1} {z} |
| 77 | ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
| 78 | ; KNL-NEXT: vprord $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 79 | ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 80 | ; KNL-NEXT: retq |
| 81 | ; |
| 82 | ; SKX-LABEL: test_splat_ror_v16i32: |
| 83 | ; SKX: # BB#0: |
| 84 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 85 | ; SKX-NEXT: vprord $5, %zmm0, %zmm1 {%k1} |
| 86 | ; SKX-NEXT: vprord $5, %zmm0, %zmm2 {%k1} {z} |
| 87 | ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
| 88 | ; SKX-NEXT: vprord $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 89 | ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 90 | ; SKX-NEXT: retq |
| 91 | %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2) |
| 92 | %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2) |
| 93 | %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1) |
| 94 | %res3 = add <16 x i32> %res, %res1 |
| 95 | %res4 = add <16 x i32> %res3, %res2 |
| 96 | ret <16 x i32> %res4 |
| 97 | } |
| 98 | |
| 99 | define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { |
| 100 | ; KNL-LABEL: test_splat_ror_v8i64: |
| 101 | ; KNL: # BB#0: |
| 102 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 103 | ; KNL-NEXT: vprorq $5, %zmm0, %zmm1 {%k1} |
| 104 | ; KNL-NEXT: vprorq $5, %zmm0, %zmm2 {%k1} {z} |
| 105 | ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
| 106 | ; KNL-NEXT: vprorq $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 107 | ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 108 | ; KNL-NEXT: retq |
| 109 | ; |
| 110 | ; SKX-LABEL: test_splat_ror_v8i64: |
| 111 | ; SKX: # BB#0: |
| 112 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 113 | ; SKX-NEXT: vprorq $5, %zmm0, %zmm1 {%k1} |
| 114 | ; SKX-NEXT: vprorq $5, %zmm0, %zmm2 {%k1} {z} |
| 115 | ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
| 116 | ; SKX-NEXT: vprorq $5, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 117 | ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 118 | ; SKX-NEXT: retq |
| 119 | %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2) |
| 120 | %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2) |
| 121 | %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1) |
| 122 | %res3 = add <8 x i64> %res, %res1 |
| 123 | %res4 = add <8 x i64> %res3, %res2 |
| 124 | ret <8 x i64> %res4 |
| 125 | } |
| 126 | |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 127 | ; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions. |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 128 | |
| 129 | define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { |
| 130 | ; KNL-LABEL: test_splat_bounds_rol_v16i32: |
| 131 | ; KNL: # BB#0: |
| 132 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 133 | ; KNL-NEXT: vprold $1, %zmm0, %zmm1 {%k1} |
| 134 | ; KNL-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 135 | ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 136 | ; KNL-NEXT: vprold $30, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 137 | ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| 138 | ; KNL-NEXT: retq |
| 139 | ; |
| 140 | ; SKX-LABEL: test_splat_bounds_rol_v16i32: |
| 141 | ; SKX: # BB#0: |
| 142 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 143 | ; SKX-NEXT: vprold $1, %zmm0, %zmm1 {%k1} |
| 144 | ; SKX-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 145 | ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 146 | ; SKX-NEXT: vprold $30, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 147 | ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| 148 | ; SKX-NEXT: retq |
| 149 | %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2) |
| 150 | %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2) |
| 151 | %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1) |
| 152 | %res3 = add <16 x i32> %res, %res1 |
| 153 | %res4 = add <16 x i32> %res3, %res2 |
| 154 | ret <16 x i32> %res4 |
| 155 | } |
| 156 | |
| 157 | define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { |
| 158 | ; KNL-LABEL: test_splat_bounds_rol_v8i64: |
| 159 | ; KNL: # BB#0: |
| 160 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 161 | ; KNL-NEXT: vprolq $62, %zmm0, %zmm1 {%k1} |
| 162 | ; KNL-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 163 | ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 164 | ; KNL-NEXT: vprolq $63, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 165 | ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| 166 | ; KNL-NEXT: retq |
| 167 | ; |
| 168 | ; SKX-LABEL: test_splat_bounds_rol_v8i64: |
| 169 | ; SKX: # BB#0: |
| 170 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 171 | ; SKX-NEXT: vprolq $62, %zmm0, %zmm1 {%k1} |
| 172 | ; SKX-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 173 | ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 174 | ; SKX-NEXT: vprolq $63, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 175 | ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| 176 | ; SKX-NEXT: retq |
| 177 | %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2) |
| 178 | %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2) |
| 179 | %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1) |
| 180 | %res3 = add <8 x i64> %res, %res1 |
| 181 | %res4 = add <8 x i64> %res3, %res2 |
| 182 | ret <8 x i64> %res4 |
| 183 | } |
| 184 | |
| 185 | define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { |
| 186 | ; KNL-LABEL: test_splat_bounds_ror_v16i32: |
| 187 | ; KNL: # BB#0: |
| 188 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 189 | ; KNL-NEXT: vprord $1, %zmm0, %zmm1 {%k1} |
| 190 | ; KNL-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 191 | ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 192 | ; KNL-NEXT: vprord $30, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 193 | ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| 194 | ; KNL-NEXT: retq |
| 195 | ; |
| 196 | ; SKX-LABEL: test_splat_bounds_ror_v16i32: |
| 197 | ; SKX: # BB#0: |
| 198 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 199 | ; SKX-NEXT: vprord $1, %zmm0, %zmm1 {%k1} |
| 200 | ; SKX-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 201 | ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 202 | ; SKX-NEXT: vprord $30, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 203 | ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 |
| 204 | ; SKX-NEXT: retq |
| 205 | %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2) |
| 206 | %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2) |
| 207 | %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1) |
| 208 | %res3 = add <16 x i32> %res, %res1 |
| 209 | %res4 = add <16 x i32> %res3, %res2 |
| 210 | ret <16 x i32> %res4 |
| 211 | } |
| 212 | |
| 213 | define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { |
| 214 | ; KNL-LABEL: test_splat_bounds_ror_v8i64: |
| 215 | ; KNL: # BB#0: |
| 216 | ; KNL-NEXT: kmovw %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 217 | ; KNL-NEXT: vprorq $62, %zmm0, %zmm1 {%k1} |
| 218 | ; KNL-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 219 | ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 220 | ; KNL-NEXT: vprorq $63, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 221 | ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| 222 | ; KNL-NEXT: retq |
| 223 | ; |
| 224 | ; SKX-LABEL: test_splat_bounds_ror_v8i64: |
| 225 | ; SKX: # BB#0: |
| 226 | ; SKX-NEXT: kmovd %edi, %k1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 227 | ; SKX-NEXT: vprorq $62, %zmm0, %zmm1 {%k1} |
| 228 | ; SKX-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z} |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 229 | ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 |
Simon Pilgrim | 1cbe8c2 | 2017-07-17 14:11:30 +0000 | [diff] [blame] | 230 | ; SKX-NEXT: vprorq $63, %zmm0, %zmm0 |
Simon Pilgrim | 5aa70e7 | 2017-07-17 10:09:48 +0000 | [diff] [blame] | 231 | ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 |
| 232 | ; SKX-NEXT: retq |
| 233 | %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2) |
| 234 | %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2) |
| 235 | %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1) |
| 236 | %res3 = add <8 x i64> %res, %res1 |
| 237 | %res4 = add <8 x i64> %res3, %res2 |
| 238 | ret <8 x i64> %res4 |
| 239 | } |
Simon Pilgrim | 0636fbd | 2017-07-18 11:18:38 +0000 | [diff] [blame] | 240 | |
| 241 | ; Constant folding |
Simon Pilgrim | b648525 | 2017-07-20 13:07:37 +0000 | [diff] [blame^] | 242 | ; We also test with a target shuffle so that this can't be constant folded upon creation, it must |
| 243 | ; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively. |
Simon Pilgrim | 0636fbd | 2017-07-18 11:18:38 +0000 | [diff] [blame] | 244 | |
| 245 | define <8 x i64> @test_fold_rol_v8i64() { |
| 246 | ; CHECK-LABEL: test_fold_rol_v8i64: |
| 247 | ; CHECK: # BB#0: |
| 248 | ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808] |
| 249 | ; CHECK-NEXT: retq |
| 250 | %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1) |
| 251 | ret <8 x i64> %res |
| 252 | } |
| 253 | |
Simon Pilgrim | b648525 | 2017-07-20 13:07:37 +0000 | [diff] [blame^] | 254 | define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) { |
| 255 | ; CHECK-LABEL: test_fold_rol_v16i32: |
| 256 | ; CHECK: # BB#0: |
| 257 | ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] |
| 258 | ; CHECK-NEXT: vprolvd {{.*}}(%rip), %zmm0, %zmm0 |
| 259 | ; CHECK-NEXT: retq |
| 260 | %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1) |
| 261 | %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1) |
| 262 | ret <16 x i32> %res1 |
| 263 | } |
| 264 | |
Simon Pilgrim | 0636fbd | 2017-07-18 11:18:38 +0000 | [diff] [blame] | 265 | define <8 x i64> @test_fold_ror_v8i64() { |
| 266 | ; CHECK-LABEL: test_fold_ror_v8i64: |
| 267 | ; CHECK: # BB#0: |
Simon Pilgrim | b648525 | 2017-07-20 13:07:37 +0000 | [diff] [blame^] | 268 | ; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1] |
| 269 | ; CHECK-NEXT: vprorvq {{.*}}(%rip), %zmm0, %zmm0 |
Simon Pilgrim | 0636fbd | 2017-07-18 11:18:38 +0000 | [diff] [blame] | 270 | ; CHECK-NEXT: retq |
Simon Pilgrim | b648525 | 2017-07-20 13:07:37 +0000 | [diff] [blame^] | 271 | %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1) |
| 272 | %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1) |
| 273 | ret <8 x i64> %res1 |
| 274 | } |
| 275 | |
| 276 | define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) { |
| 277 | ; CHECK-LABEL: test_fold_ror_v16i32: |
| 278 | ; CHECK: # BB#0: |
| 279 | ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] |
| 280 | ; CHECK-NEXT: vprorvd {{.*}}(%rip), %zmm0, %zmm0 |
| 281 | ; CHECK-NEXT: retq |
| 282 | %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1) |
| 283 | %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1) |
| 284 | ret <16 x i32> %res1 |
Simon Pilgrim | 0636fbd | 2017-07-18 11:18:38 +0000 | [diff] [blame] | 285 | } |