| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 | 
|  | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 | 
|  | 4 |  | 
|  | 5 | ; | 
|  | 6 | ; Unary shuffle indices from registers | 
|  | 7 | ; | 
|  | 8 |  | 
|  | 9 | define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { | 
|  | 10 | ; ALL-LABEL: var_shuffle_v4f64_v4f64_xxxx_i64: | 
|  | 11 | ; ALL:       # BB#0: | 
|  | 12 | ; ALL-NEXT:    pushq %rbp | 
|  | 13 | ; ALL-NEXT:    movq %rsp, %rbp | 
|  | 14 | ; ALL-NEXT:    andq $-32, %rsp | 
|  | 15 | ; ALL-NEXT:    subq $64, %rsp | 
|  | 16 | ; ALL-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 17 | ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero | 
|  | 18 | ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] | 
|  | 19 | ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero | 
|  | 20 | ; ALL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 21 | ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 22 | ; ALL-NEXT:    movq %rbp, %rsp | 
|  | 23 | ; ALL-NEXT:    popq %rbp | 
|  | 24 | ; ALL-NEXT:    retq | 
|  | 25 | %x0 = extractelement <4 x double> %x, i64 %i0 | 
|  | 26 | %x1 = extractelement <4 x double> %x, i64 %i1 | 
|  | 27 | %x2 = extractelement <4 x double> %x, i64 %i2 | 
|  | 28 | %x3 = extractelement <4 x double> %x, i64 %i3 | 
|  | 29 | %r0 = insertelement <4 x double> undef, double %x0, i32 0 | 
|  | 30 | %r1 = insertelement <4 x double>   %r0, double %x1, i32 1 | 
|  | 31 | %r2 = insertelement <4 x double>   %r1, double %x2, i32 2 | 
|  | 32 | %r3 = insertelement <4 x double>   %r2, double %x3, i32 3 | 
|  | 33 | ret <4 x double> %r3 | 
|  | 34 | } | 
|  | 35 |  | 
|  | 36 | define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { | 
|  | 37 | ; ALL-LABEL: var_shuffle_v4f64_v4f64_uxx0_i64: | 
|  | 38 | ; ALL:       # BB#0: | 
|  | 39 | ; ALL-NEXT:    pushq %rbp | 
|  | 40 | ; ALL-NEXT:    movq %rsp, %rbp | 
|  | 41 | ; ALL-NEXT:    andq $-32, %rsp | 
|  | 42 | ; ALL-NEXT:    subq $64, %rsp | 
|  | 43 | ; ALL-NEXT:    vmovaps %ymm0, (%rsp) | 
| Simon Pilgrim | 941bd6b | 2016-08-24 18:07:53 +0000 | [diff] [blame] | 44 | ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero | 
|  | 45 | ; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 46 | ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero | 
|  | 47 | ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
|  | 48 | ; ALL-NEXT:    movq %rbp, %rsp | 
|  | 49 | ; ALL-NEXT:    popq %rbp | 
|  | 50 | ; ALL-NEXT:    retq | 
|  | 51 | %x0 = extractelement <4 x double> %x, i64 %i0 | 
|  | 52 | %x1 = extractelement <4 x double> %x, i64 %i1 | 
|  | 53 | %x2 = extractelement <4 x double> %x, i64 %i2 | 
|  | 54 | %x3 = extractelement <4 x double> %x, i64 %i3 | 
|  | 55 | %r0 = insertelement <4 x double> undef, double undef, i32 0 | 
|  | 56 | %r1 = insertelement <4 x double>   %r0, double   %x1, i32 1 | 
|  | 57 | %r2 = insertelement <4 x double>   %r1, double   %x2, i32 2 | 
|  | 58 | %r3 = insertelement <4 x double>   %r2, double   0.0, i32 3 | 
|  | 59 | ret <4 x double> %r3 | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { | 
|  | 63 | ; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64: | 
|  | 64 | ; ALL:       # BB#0: | 
|  | 65 | ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 66 | ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero | 
|  | 67 | ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] | 
|  | 68 | ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero | 
|  | 69 | ; ALL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 70 | ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 71 | ; ALL-NEXT:    retq | 
|  | 72 | %x0 = extractelement <2 x double> %x, i64 %i0 | 
|  | 73 | %x1 = extractelement <2 x double> %x, i64 %i1 | 
|  | 74 | %x2 = extractelement <2 x double> %x, i64 %i2 | 
|  | 75 | %x3 = extractelement <2 x double> %x, i64 %i3 | 
|  | 76 | %r0 = insertelement <4 x double> undef, double %x0, i32 0 | 
|  | 77 | %r1 = insertelement <4 x double>   %r0, double %x1, i32 1 | 
|  | 78 | %r2 = insertelement <4 x double>   %r1, double %x2, i32 2 | 
|  | 79 | %r3 = insertelement <4 x double>   %r2, double %x3, i32 3 | 
|  | 80 | ret <4 x double> %r3 | 
|  | 81 | } | 
|  | 82 |  | 
|  | 83 | define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { | 
|  | 84 | ; AVX1-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64: | 
|  | 85 | ; AVX1:       # BB#0: | 
|  | 86 | ; AVX1-NEXT:    pushq %rbp | 
|  | 87 | ; AVX1-NEXT:    movq %rsp, %rbp | 
|  | 88 | ; AVX1-NEXT:    andq $-32, %rsp | 
|  | 89 | ; AVX1-NEXT:    subq $64, %rsp | 
|  | 90 | ; AVX1-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 91 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 92 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 93 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 94 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 95 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 96 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] | 
|  | 97 | ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 98 | ; AVX1-NEXT:    movq %rbp, %rsp | 
|  | 99 | ; AVX1-NEXT:    popq %rbp | 
|  | 100 | ; AVX1-NEXT:    retq | 
|  | 101 | ; | 
|  | 102 | ; AVX2-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64: | 
|  | 103 | ; AVX2:       # BB#0: | 
|  | 104 | ; AVX2-NEXT:    pushq %rbp | 
|  | 105 | ; AVX2-NEXT:    movq %rsp, %rbp | 
|  | 106 | ; AVX2-NEXT:    andq $-32, %rsp | 
|  | 107 | ; AVX2-NEXT:    subq $64, %rsp | 
|  | 108 | ; AVX2-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 109 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 110 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 111 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 112 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 113 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 114 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] | 
|  | 115 | ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 116 | ; AVX2-NEXT:    movq %rbp, %rsp | 
|  | 117 | ; AVX2-NEXT:    popq %rbp | 
|  | 118 | ; AVX2-NEXT:    retq | 
|  | 119 | %x0 = extractelement <4 x i64> %x, i64 %i0 | 
|  | 120 | %x1 = extractelement <4 x i64> %x, i64 %i1 | 
|  | 121 | %x2 = extractelement <4 x i64> %x, i64 %i2 | 
|  | 122 | %x3 = extractelement <4 x i64> %x, i64 %i3 | 
|  | 123 | %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 | 
|  | 124 | %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1 | 
|  | 125 | %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2 | 
|  | 126 | %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3 | 
|  | 127 | ret <4 x i64> %r3 | 
|  | 128 | } | 
|  | 129 |  | 
|  | 130 | define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { | 
|  | 131 | ; AVX1-LABEL: var_shuffle_v4i64_v4i64_xx00_i64: | 
|  | 132 | ; AVX1:       # BB#0: | 
|  | 133 | ; AVX1-NEXT:    pushq %rbp | 
|  | 134 | ; AVX1-NEXT:    movq %rsp, %rbp | 
|  | 135 | ; AVX1-NEXT:    andq $-32, %rsp | 
|  | 136 | ; AVX1-NEXT:    subq $64, %rsp | 
|  | 137 | ; AVX1-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 138 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 139 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 140 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 141 | ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1 | 
|  | 142 | ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
|  | 143 | ; AVX1-NEXT:    movq %rbp, %rsp | 
|  | 144 | ; AVX1-NEXT:    popq %rbp | 
|  | 145 | ; AVX1-NEXT:    retq | 
|  | 146 | ; | 
|  | 147 | ; AVX2-LABEL: var_shuffle_v4i64_v4i64_xx00_i64: | 
|  | 148 | ; AVX2:       # BB#0: | 
|  | 149 | ; AVX2-NEXT:    pushq %rbp | 
|  | 150 | ; AVX2-NEXT:    movq %rsp, %rbp | 
|  | 151 | ; AVX2-NEXT:    andq $-32, %rsp | 
|  | 152 | ; AVX2-NEXT:    subq $64, %rsp | 
|  | 153 | ; AVX2-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 154 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 155 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 156 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 157 | ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1 | 
|  | 158 | ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0 | 
|  | 159 | ; AVX2-NEXT:    movq %rbp, %rsp | 
|  | 160 | ; AVX2-NEXT:    popq %rbp | 
|  | 161 | ; AVX2-NEXT:    retq | 
|  | 162 | %x0 = extractelement <4 x i64> %x, i64 %i0 | 
|  | 163 | %x1 = extractelement <4 x i64> %x, i64 %i1 | 
|  | 164 | %x2 = extractelement <4 x i64> %x, i64 %i2 | 
|  | 165 | %x3 = extractelement <4 x i64> %x, i64 %i3 | 
|  | 166 | %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 | 
|  | 167 | %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1 | 
|  | 168 | %r2 = insertelement <4 x i64>   %r1, i64   0, i32 2 | 
|  | 169 | %r3 = insertelement <4 x i64>   %r2, i64   0, i32 3 | 
|  | 170 | ret <4 x i64> %r3 | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { | 
|  | 174 | ; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64: | 
|  | 175 | ; AVX1:       # BB#0: | 
|  | 176 | ; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 177 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 178 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 179 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 180 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 181 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 182 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] | 
|  | 183 | ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 184 | ; AVX1-NEXT:    retq | 
|  | 185 | ; | 
|  | 186 | ; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64: | 
|  | 187 | ; AVX2:       # BB#0: | 
|  | 188 | ; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 189 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 190 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 191 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 192 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 193 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 194 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] | 
|  | 195 | ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 196 | ; AVX2-NEXT:    retq | 
|  | 197 | %x0 = extractelement <2 x i64> %x, i64 %i0 | 
|  | 198 | %x1 = extractelement <2 x i64> %x, i64 %i1 | 
|  | 199 | %x2 = extractelement <2 x i64> %x, i64 %i2 | 
|  | 200 | %x3 = extractelement <2 x i64> %x, i64 %i3 | 
|  | 201 | %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 | 
|  | 202 | %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1 | 
|  | 203 | %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2 | 
|  | 204 | %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3 | 
|  | 205 | ret <4 x i64> %r3 | 
|  | 206 | } | 
|  | 207 |  | 
|  | 208 | define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind { | 
|  | 209 | ; AVX1-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32: | 
|  | 210 | ; AVX1:       # BB#0: | 
|  | 211 | ; AVX1-NEXT:    pushq %rbp | 
|  | 212 | ; AVX1-NEXT:    movq %rsp, %rbp | 
|  | 213 | ; AVX1-NEXT:    andq $-32, %rsp | 
|  | 214 | ; AVX1-NEXT:    subq $64, %rsp | 
|  | 215 | ; AVX1-NEXT:    movslq %edi, %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 216 | ; AVX1-NEXT:    movslq %esi, %rsi | 
|  | 217 | ; AVX1-NEXT:    movslq %edx, %rdx | 
|  | 218 | ; AVX1-NEXT:    movslq %ecx, %r11 | 
|  | 219 | ; AVX1-NEXT:    movslq %r8d, %r10 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 220 | ; AVX1-NEXT:    vmovaps %ymm0, (%rsp) | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 221 | ; AVX1-NEXT:    movslq %r9d, %r8 | 
|  | 222 | ; AVX1-NEXT:    movslq 16(%rbp), %rdi | 
|  | 223 | ; AVX1-NEXT:    movslq 24(%rbp), %rcx | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 224 | ; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | 225 | ; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 226 | ; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero | 
|  | 227 | ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] | 
|  | 228 | ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] | 
|  | 229 | ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] | 
|  | 230 | ; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero | 
|  | 231 | ; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3] | 
|  | 232 | ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1],xmm0[0],xmm3[3] | 
|  | 233 | ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] | 
|  | 234 | ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0 | 
|  | 235 | ; AVX1-NEXT:    movq %rbp, %rsp | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 236 | ; AVX1-NEXT:    popq %rbp | 
|  | 237 | ; AVX1-NEXT:    retq | 
|  | 238 | ; | 
|  | 239 | ; AVX2-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32: | 
|  | 240 | ; AVX2:       # BB#0: | 
|  | 241 | ; AVX2-NEXT:    vmovd %edi, %xmm1 | 
|  | 242 | ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm1 | 
|  | 243 | ; AVX2-NEXT:    vmovd %esi, %xmm2 | 
|  | 244 | ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm2 | 
|  | 245 | ; AVX2-NEXT:    vmovd %edx, %xmm3 | 
|  | 246 | ; AVX2-NEXT:    vpermps %ymm0, %ymm3, %ymm3 | 
|  | 247 | ; AVX2-NEXT:    vmovd %ecx, %xmm4 | 
|  | 248 | ; AVX2-NEXT:    vpermps %ymm0, %ymm4, %ymm4 | 
|  | 249 | ; AVX2-NEXT:    vmovd %r8d, %xmm5 | 
|  | 250 | ; AVX2-NEXT:    vpermps %ymm0, %ymm5, %ymm5 | 
|  | 251 | ; AVX2-NEXT:    vmovd %r9d, %xmm6 | 
|  | 252 | ; AVX2-NEXT:    vpermps %ymm0, %ymm6, %ymm6 | 
|  | 253 | ; AVX2-NEXT:    vmovd {{.*#+}} xmm7 = mem[0],zero,zero,zero | 
|  | 254 | ; AVX2-NEXT:    vpermps %ymm0, %ymm7, %ymm7 | 
|  | 255 | ; AVX2-NEXT:    vmovd {{.*#+}} xmm8 = mem[0],zero,zero,zero | 
|  | 256 | ; AVX2-NEXT:    vpermps %ymm0, %ymm8, %ymm0 | 
|  | 257 | ; AVX2-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[2,3] | 
|  | 258 | ; AVX2-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm7[0],xmm5[3] | 
|  | 259 | ; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm5[0,1,2],xmm0[0] | 
|  | 260 | ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] | 
|  | 261 | ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] | 
|  | 262 | ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] | 
|  | 263 | ; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0 | 
|  | 264 | ; AVX2-NEXT:    retq | 
|  | 265 | %x0 = extractelement <8 x float> %x, i32 %i0 | 
|  | 266 | %x1 = extractelement <8 x float> %x, i32 %i1 | 
|  | 267 | %x2 = extractelement <8 x float> %x, i32 %i2 | 
|  | 268 | %x3 = extractelement <8 x float> %x, i32 %i3 | 
|  | 269 | %x4 = extractelement <8 x float> %x, i32 %i4 | 
|  | 270 | %x5 = extractelement <8 x float> %x, i32 %i5 | 
|  | 271 | %x6 = extractelement <8 x float> %x, i32 %i6 | 
|  | 272 | %x7 = extractelement <8 x float> %x, i32 %i7 | 
|  | 273 | %r0 = insertelement <8 x float> undef, float %x0, i32 0 | 
|  | 274 | %r1 = insertelement <8 x float>   %r0, float %x1, i32 1 | 
|  | 275 | %r2 = insertelement <8 x float>   %r1, float %x2, i32 2 | 
|  | 276 | %r3 = insertelement <8 x float>   %r2, float %x3, i32 3 | 
|  | 277 | %r4 = insertelement <8 x float>   %r3, float %x4, i32 4 | 
|  | 278 | %r5 = insertelement <8 x float>   %r4, float %x5, i32 5 | 
|  | 279 | %r6 = insertelement <8 x float>   %r5, float %x6, i32 6 | 
|  | 280 | %r7 = insertelement <8 x float>   %r6, float %x7, i32 7 | 
|  | 281 | ret <8 x float> %r7 | 
|  | 282 | } | 
|  | 283 |  | 
|  | 284 | define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind { | 
|  | 285 | ; ALL-LABEL: var_shuffle_v8f32_v4f32_xxxxxxxx_i32: | 
|  | 286 | ; ALL:       # BB#0: | 
|  | 287 | ; ALL-NEXT:    movslq %edi, %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 288 | ; ALL-NEXT:    movslq %esi, %rsi | 
|  | 289 | ; ALL-NEXT:    movslq %edx, %rdx | 
|  | 290 | ; ALL-NEXT:    movslq %ecx, %r11 | 
|  | 291 | ; ALL-NEXT:    movslq %r8d, %r10 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 292 | ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 293 | ; ALL-NEXT:    movslq %r9d, %r8 | 
|  | 294 | ; ALL-NEXT:    movslq {{[0-9]+}}(%rsp), %rdi | 
|  | 295 | ; ALL-NEXT:    movslq {{[0-9]+}}(%rsp), %rcx | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 296 | ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | 
|  | 297 | ; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 298 | ; ALL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero | 
|  | 299 | ; ALL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] | 
|  | 300 | ; ALL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] | 
|  | 301 | ; ALL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] | 
|  | 302 | ; ALL-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero | 
|  | 303 | ; ALL-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3] | 
|  | 304 | ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1],xmm0[0],xmm3[3] | 
|  | 305 | ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] | 
|  | 306 | ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 307 | ; ALL-NEXT:    retq | 
|  | 308 | %x0 = extractelement <4 x float> %x, i32 %i0 | 
|  | 309 | %x1 = extractelement <4 x float> %x, i32 %i1 | 
|  | 310 | %x2 = extractelement <4 x float> %x, i32 %i2 | 
|  | 311 | %x3 = extractelement <4 x float> %x, i32 %i3 | 
|  | 312 | %x4 = extractelement <4 x float> %x, i32 %i4 | 
|  | 313 | %x5 = extractelement <4 x float> %x, i32 %i5 | 
|  | 314 | %x6 = extractelement <4 x float> %x, i32 %i6 | 
|  | 315 | %x7 = extractelement <4 x float> %x, i32 %i7 | 
|  | 316 | %r0 = insertelement <8 x float> undef, float %x0, i32 0 | 
|  | 317 | %r1 = insertelement <8 x float>   %r0, float %x1, i32 1 | 
|  | 318 | %r2 = insertelement <8 x float>   %r1, float %x2, i32 2 | 
|  | 319 | %r3 = insertelement <8 x float>   %r2, float %x3, i32 3 | 
|  | 320 | %r4 = insertelement <8 x float>   %r3, float %x4, i32 4 | 
|  | 321 | %r5 = insertelement <8 x float>   %r4, float %x5, i32 5 | 
|  | 322 | %r6 = insertelement <8 x float>   %r5, float %x6, i32 6 | 
|  | 323 | %r7 = insertelement <8 x float>   %r6, float %x7, i32 7 | 
|  | 324 | ret <8 x float> %r7 | 
|  | 325 | } | 
|  | 326 |  | 
|  | 327 | define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind { | 
|  | 328 | ; AVX1-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16: | 
|  | 329 | ; AVX1:       # BB#0: | 
|  | 330 | ; AVX1-NEXT:    pushq %rbp | 
|  | 331 | ; AVX1-NEXT:    movq %rsp, %rbp | 
|  | 332 | ; AVX1-NEXT:    andq $-32, %rsp | 
|  | 333 | ; AVX1-NEXT:    subq $64, %rsp | 
|  | 334 | ; AVX1-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 335 | ; AVX1-NEXT:    movslq 32(%rbp), %rax | 
|  | 336 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 337 | ; AVX1-NEXT:    vmovd %eax, %xmm0 | 
|  | 338 | ; AVX1-NEXT:    movslq 40(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 339 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 340 | ; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 341 | ; AVX1-NEXT:    movslq 48(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 342 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 343 | ; AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 344 | ; AVX1-NEXT:    movslq 56(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 345 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 346 | ; AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 347 | ; AVX1-NEXT:    movslq 64(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 348 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 349 | ; AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 350 | ; AVX1-NEXT:    movslq 72(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 351 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 352 | ; AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 353 | ; AVX1-NEXT:    movslq 80(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 354 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 355 | ; AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 356 | ; AVX1-NEXT:    movslq 88(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 357 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 358 | ; AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 359 | ; AVX1-NEXT:    movslq %edi, %rax | 
|  | 360 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 361 | ; AVX1-NEXT:    vmovd %eax, %xmm1 | 
|  | 362 | ; AVX1-NEXT:    movslq %esi, %rax | 
|  | 363 | ; AVX1-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 364 | ; AVX1-NEXT:    movslq %edx, %rax | 
|  | 365 | ; AVX1-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 366 | ; AVX1-NEXT:    movslq %ecx, %rax | 
|  | 367 | ; AVX1-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 368 | ; AVX1-NEXT:    movslq %r8d, %rax | 
|  | 369 | ; AVX1-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 370 | ; AVX1-NEXT:    movslq %r9d, %rax | 
|  | 371 | ; AVX1-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 372 | ; AVX1-NEXT:    movslq 16(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 373 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 374 | ; AVX1-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 375 | ; AVX1-NEXT:    movslq 24(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 376 | ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 377 | ; AVX1-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 378 | ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0 | 
|  | 379 | ; AVX1-NEXT:    movq %rbp, %rsp | 
|  | 380 | ; AVX1-NEXT:    popq %rbp | 
|  | 381 | ; AVX1-NEXT:    retq | 
|  | 382 | ; | 
|  | 383 | ; AVX2-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16: | 
|  | 384 | ; AVX2:       # BB#0: | 
|  | 385 | ; AVX2-NEXT:    pushq %rbp | 
|  | 386 | ; AVX2-NEXT:    movq %rsp, %rbp | 
|  | 387 | ; AVX2-NEXT:    andq $-32, %rsp | 
|  | 388 | ; AVX2-NEXT:    subq $64, %rsp | 
|  | 389 | ; AVX2-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 390 | ; AVX2-NEXT:    movslq 32(%rbp), %rax | 
|  | 391 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 392 | ; AVX2-NEXT:    vmovd %eax, %xmm0 | 
|  | 393 | ; AVX2-NEXT:    movslq 40(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 394 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 395 | ; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 396 | ; AVX2-NEXT:    movslq 48(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 397 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 398 | ; AVX2-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 399 | ; AVX2-NEXT:    movslq 56(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 400 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 401 | ; AVX2-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 402 | ; AVX2-NEXT:    movslq 64(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 403 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 404 | ; AVX2-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 405 | ; AVX2-NEXT:    movslq 72(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 406 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 407 | ; AVX2-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 408 | ; AVX2-NEXT:    movslq 80(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 409 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 410 | ; AVX2-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 411 | ; AVX2-NEXT:    movslq 88(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 412 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 413 | ; AVX2-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 414 | ; AVX2-NEXT:    movslq %edi, %rax | 
|  | 415 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 416 | ; AVX2-NEXT:    vmovd %eax, %xmm1 | 
|  | 417 | ; AVX2-NEXT:    movslq %esi, %rax | 
|  | 418 | ; AVX2-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 419 | ; AVX2-NEXT:    movslq %edx, %rax | 
|  | 420 | ; AVX2-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 421 | ; AVX2-NEXT:    movslq %ecx, %rax | 
|  | 422 | ; AVX2-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 423 | ; AVX2-NEXT:    movslq %r8d, %rax | 
|  | 424 | ; AVX2-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 425 | ; AVX2-NEXT:    movslq %r9d, %rax | 
|  | 426 | ; AVX2-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 427 | ; AVX2-NEXT:    movslq 16(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 428 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 429 | ; AVX2-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 430 | ; AVX2-NEXT:    movslq 24(%rbp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 431 | ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax | 
|  | 432 | ; AVX2-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 433 | ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 | 
|  | 434 | ; AVX2-NEXT:    movq %rbp, %rsp | 
|  | 435 | ; AVX2-NEXT:    popq %rbp | 
|  | 436 | ; AVX2-NEXT:    retq | 
|  | 437 | %x0  = extractelement <16 x i16> %x, i32 %i0 | 
|  | 438 | %x1  = extractelement <16 x i16> %x, i32 %i1 | 
|  | 439 | %x2  = extractelement <16 x i16> %x, i32 %i2 | 
|  | 440 | %x3  = extractelement <16 x i16> %x, i32 %i3 | 
|  | 441 | %x4  = extractelement <16 x i16> %x, i32 %i4 | 
|  | 442 | %x5  = extractelement <16 x i16> %x, i32 %i5 | 
|  | 443 | %x6  = extractelement <16 x i16> %x, i32 %i6 | 
|  | 444 | %x7  = extractelement <16 x i16> %x, i32 %i7 | 
|  | 445 | %x8  = extractelement <16 x i16> %x, i32 %i8 | 
|  | 446 | %x9  = extractelement <16 x i16> %x, i32 %i9 | 
|  | 447 | %x10 = extractelement <16 x i16> %x, i32 %i10 | 
|  | 448 | %x11 = extractelement <16 x i16> %x, i32 %i11 | 
|  | 449 | %x12 = extractelement <16 x i16> %x, i32 %i12 | 
|  | 450 | %x13 = extractelement <16 x i16> %x, i32 %i13 | 
|  | 451 | %x14 = extractelement <16 x i16> %x, i32 %i14 | 
|  | 452 | %x15 = extractelement <16 x i16> %x, i32 %i15 | 
|  | 453 | %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0 | 
|  | 454 | %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1 | 
|  | 455 | %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2 | 
|  | 456 | %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3 | 
|  | 457 | %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4 | 
|  | 458 | %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5 | 
|  | 459 | %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6 | 
|  | 460 | %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7 | 
|  | 461 | %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8 | 
|  | 462 | %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9 | 
|  | 463 | %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10 | 
|  | 464 | %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11 | 
|  | 465 | %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12 | 
|  | 466 | %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13 | 
|  | 467 | %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14 | 
|  | 468 | %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15 | 
|  | 469 | ret <16 x i16> %r15 | 
|  | 470 | } | 
|  | 471 |  | 
|  | 472 | define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind { | 
|  | 473 | ; AVX1-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16: | 
|  | 474 | ; AVX1:       # BB#0: | 
|  | 475 | ; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 476 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
|  | 477 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 478 | ; AVX1-NEXT:    vmovd %eax, %xmm0 | 
|  | 479 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 480 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 481 | ; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 482 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 483 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 484 | ; AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 485 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 486 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 487 | ; AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 488 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 489 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 490 | ; AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 491 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 492 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 493 | ; AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 494 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 495 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 496 | ; AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 497 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 498 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 499 | ; AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 500 | ; AVX1-NEXT:    movslq %edi, %rax | 
|  | 501 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 502 | ; AVX1-NEXT:    vmovd %eax, %xmm1 | 
|  | 503 | ; AVX1-NEXT:    movslq %esi, %rax | 
|  | 504 | ; AVX1-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 505 | ; AVX1-NEXT:    movslq %edx, %rax | 
|  | 506 | ; AVX1-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 507 | ; AVX1-NEXT:    movslq %ecx, %rax | 
|  | 508 | ; AVX1-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 509 | ; AVX1-NEXT:    movslq %r8d, %rax | 
|  | 510 | ; AVX1-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 511 | ; AVX1-NEXT:    movslq %r9d, %rax | 
|  | 512 | ; AVX1-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 513 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 514 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 515 | ; AVX1-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 516 | ; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 517 | ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 518 | ; AVX1-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 519 | ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0 | 
|  | 520 | ; AVX1-NEXT:    retq | 
|  | 521 | ; | 
|  | 522 | ; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16: | 
|  | 523 | ; AVX2:       # BB#0: | 
|  | 524 | ; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 525 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
|  | 526 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 527 | ; AVX2-NEXT:    vmovd %eax, %xmm0 | 
|  | 528 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 529 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 530 | ; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 531 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 532 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 533 | ; AVX2-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 534 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 535 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 536 | ; AVX2-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 537 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 538 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 539 | ; AVX2-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 540 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 541 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 542 | ; AVX2-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 543 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 544 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 545 | ; AVX2-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 546 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 547 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 548 | ; AVX2-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 549 | ; AVX2-NEXT:    movslq %edi, %rax | 
|  | 550 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 551 | ; AVX2-NEXT:    vmovd %eax, %xmm1 | 
|  | 552 | ; AVX2-NEXT:    movslq %esi, %rax | 
|  | 553 | ; AVX2-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 554 | ; AVX2-NEXT:    movslq %edx, %rax | 
|  | 555 | ; AVX2-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 556 | ; AVX2-NEXT:    movslq %ecx, %rax | 
|  | 557 | ; AVX2-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 558 | ; AVX2-NEXT:    movslq %r8d, %rax | 
|  | 559 | ; AVX2-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 560 | ; AVX2-NEXT:    movslq %r9d, %rax | 
|  | 561 | ; AVX2-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1 | 
|  | 562 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 563 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 564 | ; AVX2-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 565 | ; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 566 | ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax | 
|  | 567 | ; AVX2-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 568 | ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 | 
|  | 569 | ; AVX2-NEXT:    retq | 
|  | 570 | %x0  = extractelement <8 x i16> %x, i32 %i0 | 
|  | 571 | %x1  = extractelement <8 x i16> %x, i32 %i1 | 
|  | 572 | %x2  = extractelement <8 x i16> %x, i32 %i2 | 
|  | 573 | %x3  = extractelement <8 x i16> %x, i32 %i3 | 
|  | 574 | %x4  = extractelement <8 x i16> %x, i32 %i4 | 
|  | 575 | %x5  = extractelement <8 x i16> %x, i32 %i5 | 
|  | 576 | %x6  = extractelement <8 x i16> %x, i32 %i6 | 
|  | 577 | %x7  = extractelement <8 x i16> %x, i32 %i7 | 
|  | 578 | %x8  = extractelement <8 x i16> %x, i32 %i8 | 
|  | 579 | %x9  = extractelement <8 x i16> %x, i32 %i9 | 
|  | 580 | %x10 = extractelement <8 x i16> %x, i32 %i10 | 
|  | 581 | %x11 = extractelement <8 x i16> %x, i32 %i11 | 
|  | 582 | %x12 = extractelement <8 x i16> %x, i32 %i12 | 
|  | 583 | %x13 = extractelement <8 x i16> %x, i32 %i13 | 
|  | 584 | %x14 = extractelement <8 x i16> %x, i32 %i14 | 
|  | 585 | %x15 = extractelement <8 x i16> %x, i32 %i15 | 
|  | 586 | %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0 | 
|  | 587 | %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1 | 
|  | 588 | %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2 | 
|  | 589 | %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3 | 
|  | 590 | %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4 | 
|  | 591 | %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5 | 
|  | 592 | %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6 | 
|  | 593 | %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7 | 
|  | 594 | %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8 | 
|  | 595 | %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9 | 
|  | 596 | %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10 | 
|  | 597 | %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11 | 
|  | 598 | %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12 | 
|  | 599 | %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13 | 
|  | 600 | %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14 | 
|  | 601 | %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15 | 
|  | 602 | ret <16 x i16> %r15 | 
|  | 603 | } | 
|  | 604 |  | 
|  | 605 | ; | 
|  | 606 | ; Unary shuffle indices from memory | 
|  | 607 | ; | 
|  | 608 |  | 
|  | 609 | define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind { | 
|  | 610 | ; AVX1-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64: | 
|  | 611 | ; AVX1:       # BB#0: | 
|  | 612 | ; AVX1-NEXT:    pushq %rbp | 
|  | 613 | ; AVX1-NEXT:    movq %rsp, %rbp | 
|  | 614 | ; AVX1-NEXT:    andq $-32, %rsp | 
|  | 615 | ; AVX1-NEXT:    subq $64, %rsp | 
|  | 616 | ; AVX1-NEXT:    movq (%rdi), %rax | 
|  | 617 | ; AVX1-NEXT:    movq 8(%rdi), %rcx | 
|  | 618 | ; AVX1-NEXT:    movq 16(%rdi), %rdx | 
|  | 619 | ; AVX1-NEXT:    movq 24(%rdi), %rsi | 
|  | 620 | ; AVX1-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 621 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 622 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 623 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 624 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 625 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 626 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | 
|  | 627 | ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 628 | ; AVX1-NEXT:    movq %rbp, %rsp | 
|  | 629 | ; AVX1-NEXT:    popq %rbp | 
|  | 630 | ; AVX1-NEXT:    retq | 
|  | 631 | ; | 
|  | 632 | ; AVX2-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64: | 
|  | 633 | ; AVX2:       # BB#0: | 
|  | 634 | ; AVX2-NEXT:    pushq %rbp | 
|  | 635 | ; AVX2-NEXT:    movq %rsp, %rbp | 
|  | 636 | ; AVX2-NEXT:    andq $-32, %rsp | 
|  | 637 | ; AVX2-NEXT:    subq $64, %rsp | 
|  | 638 | ; AVX2-NEXT:    movq (%rdi), %rax | 
|  | 639 | ; AVX2-NEXT:    movq 8(%rdi), %rcx | 
|  | 640 | ; AVX2-NEXT:    movq 16(%rdi), %rdx | 
|  | 641 | ; AVX2-NEXT:    movq 24(%rdi), %rsi | 
|  | 642 | ; AVX2-NEXT:    vmovaps %ymm0, (%rsp) | 
|  | 643 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 644 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 645 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 646 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 647 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 648 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | 
|  | 649 | ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 650 | ; AVX2-NEXT:    movq %rbp, %rsp | 
|  | 651 | ; AVX2-NEXT:    popq %rbp | 
|  | 652 | ; AVX2-NEXT:    retq | 
|  | 653 | %p0  = getelementptr inbounds i64, i64* %i, i32 0 | 
|  | 654 | %p1  = getelementptr inbounds i64, i64* %i, i32 1 | 
|  | 655 | %p2  = getelementptr inbounds i64, i64* %i, i32 2 | 
|  | 656 | %p3  = getelementptr inbounds i64, i64* %i, i32 3 | 
|  | 657 | %i0  = load i64, i64* %p0, align 4 | 
|  | 658 | %i1  = load i64, i64* %p1, align 4 | 
|  | 659 | %i2  = load i64, i64* %p2, align 4 | 
|  | 660 | %i3  = load i64, i64* %p3, align 4 | 
|  | 661 | %x0 = extractelement <4 x i64> %x, i64 %i0 | 
|  | 662 | %x1 = extractelement <4 x i64> %x, i64 %i1 | 
|  | 663 | %x2 = extractelement <4 x i64> %x, i64 %i2 | 
|  | 664 | %x3 = extractelement <4 x i64> %x, i64 %i3 | 
|  | 665 | %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 | 
|  | 666 | %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1 | 
|  | 667 | %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2 | 
|  | 668 | %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3 | 
|  | 669 | ret <4 x i64> %r3 | 
|  | 670 | } | 
|  | 671 |  | 
|  | 672 | define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind { | 
|  | 673 | ; AVX1-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64: | 
|  | 674 | ; AVX1:       # BB#0: | 
|  | 675 | ; AVX1-NEXT:    movq (%rdi), %rax | 
|  | 676 | ; AVX1-NEXT:    movq 8(%rdi), %rcx | 
|  | 677 | ; AVX1-NEXT:    movq 16(%rdi), %rdx | 
|  | 678 | ; AVX1-NEXT:    movq 24(%rdi), %rsi | 
|  | 679 | ; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 680 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 681 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 682 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 683 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 684 | ; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 685 | ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | 
|  | 686 | ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 687 | ; AVX1-NEXT:    retq | 
|  | 688 | ; | 
|  | 689 | ; AVX2-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64: | 
|  | 690 | ; AVX2:       # BB#0: | 
|  | 691 | ; AVX2-NEXT:    movq (%rdi), %rax | 
|  | 692 | ; AVX2-NEXT:    movq 8(%rdi), %rcx | 
|  | 693 | ; AVX2-NEXT:    movq 16(%rdi), %rdx | 
|  | 694 | ; AVX2-NEXT:    movq 24(%rdi), %rsi | 
|  | 695 | ; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 696 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero | 
|  | 697 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero | 
|  | 698 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 699 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] | 
| Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame^] | 700 | ; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero | 
|  | 701 | ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] | 
|  | 702 | ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0 | 
| Simon Pilgrim | aab59b7 | 2016-03-31 20:26:30 +0000 | [diff] [blame] | 703 | ; AVX2-NEXT:    retq | 
|  | 704 | %p0  = getelementptr inbounds i64, i64* %i, i32 0 | 
|  | 705 | %p1  = getelementptr inbounds i64, i64* %i, i32 1 | 
|  | 706 | %p2  = getelementptr inbounds i64, i64* %i, i32 2 | 
|  | 707 | %p3  = getelementptr inbounds i64, i64* %i, i32 3 | 
|  | 708 | %i0  = load i64, i64* %p0, align 4 | 
|  | 709 | %i1  = load i64, i64* %p1, align 4 | 
|  | 710 | %i2  = load i64, i64* %p2, align 4 | 
|  | 711 | %i3  = load i64, i64* %p3, align 4 | 
|  | 712 | %x0 = extractelement <2 x i64> %x, i64 %i0 | 
|  | 713 | %x1 = extractelement <2 x i64> %x, i64 %i1 | 
|  | 714 | %x2 = extractelement <2 x i64> %x, i64 %i2 | 
|  | 715 | %x3 = extractelement <2 x i64> %x, i64 %i3 | 
|  | 716 | %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 | 
|  | 717 | %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1 | 
|  | 718 | %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2 | 
|  | 719 | %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3 | 
|  | 720 | ret <4 x i64> %r3 | 
|  | 721 | } |