| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 1 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s |
| 2 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s |
| 3 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s |
| 4 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s |
| 5 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s |
| 6 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s |
| 7 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s |
| Evandro Menezes | 3840db5 | 2017-08-02 18:55:34 +0000 | [diff] [blame] | 8 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m2 | FileCheck %s |
| Evandro Menezes | 9f9daa1 | 2018-01-30 15:40:16 +0000 | [diff] [blame] | 9 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s |
| Evandro Menezes | b2c82447 | 2018-06-06 18:56:00 +0000 | [diff] [blame] | 10 | ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s |
| Evandro Menezes | ec330cc | 2017-02-21 22:16:06 +0000 | [diff] [blame] | 11 | |
| 12 | declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k) |
| 13 | declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d) |
| 14 | declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d, <16 x i8> %k) |
| 15 | declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %d) |
| 16 | |
| 17 | define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) { |
| 18 | %d0 = load <16 x i8>, <16 x i8>* %a0 |
| 19 | %a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1 |
| 20 | %d1 = load <16 x i8>, <16 x i8>* %a1 |
| 21 | %a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2 |
| 22 | %d2 = load <16 x i8>, <16 x i8>* %a2 |
| 23 | %a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3 |
| 24 | %d3 = load <16 x i8>, <16 x i8>* %a3 |
| 25 | %k0 = load <16 x i8>, <16 x i8>* %b0 |
| 26 | %e00 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d0, <16 x i8> %k0) |
| 27 | %f00 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00) |
| 28 | %e01 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d1, <16 x i8> %k0) |
| 29 | %f01 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01) |
| 30 | %e02 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d2, <16 x i8> %k0) |
| 31 | %f02 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02) |
| 32 | %e03 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d3, <16 x i8> %k0) |
| 33 | %f03 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03) |
| 34 | %b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1 |
| 35 | %k1 = load <16 x i8>, <16 x i8>* %b1 |
| 36 | %e10 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f00, <16 x i8> %k1) |
| 37 | %f10 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00) |
| 38 | %e11 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f01, <16 x i8> %k1) |
| 39 | %f11 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01) |
| 40 | %e12 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f02, <16 x i8> %k1) |
| 41 | %f12 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02) |
| 42 | %e13 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f03, <16 x i8> %k1) |
| 43 | %f13 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03) |
| 44 | %b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2 |
| 45 | %k2 = load <16 x i8>, <16 x i8>* %b2 |
| 46 | %e20 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f10, <16 x i8> %k2) |
| 47 | %f20 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e10) |
| 48 | %e21 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f11, <16 x i8> %k2) |
| 49 | %f21 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e11) |
| 50 | %e22 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f12, <16 x i8> %k2) |
| 51 | %f22 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e12) |
| 52 | %e23 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f13, <16 x i8> %k2) |
| 53 | %f23 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e13) |
| 54 | %b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3 |
| 55 | %k3 = load <16 x i8>, <16 x i8>* %b3 |
| 56 | %e30 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f20, <16 x i8> %k3) |
| 57 | %f30 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e20) |
| 58 | %e31 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f21, <16 x i8> %k3) |
| 59 | %f31 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e21) |
| 60 | %e32 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f22, <16 x i8> %k3) |
| 61 | %f32 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e22) |
| 62 | %e33 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f23, <16 x i8> %k3) |
| 63 | %f33 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e23) |
| 64 | %g0 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f30, <16 x i8> %d) |
| 65 | %h0 = xor <16 x i8> %g0, %e |
| 66 | %g1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f31, <16 x i8> %d) |
| 67 | %h1 = xor <16 x i8> %g1, %e |
| 68 | %g2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f32, <16 x i8> %d) |
| 69 | %h2 = xor <16 x i8> %g2, %e |
| 70 | %g3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f33, <16 x i8> %d) |
| 71 | %h3 = xor <16 x i8> %g3, %e |
| 72 | store <16 x i8> %h0, <16 x i8>* %c0 |
| 73 | %c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1 |
| 74 | store <16 x i8> %h1, <16 x i8>* %c1 |
| 75 | %c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2 |
| 76 | store <16 x i8> %h2, <16 x i8>* %c2 |
| 77 | %c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3 |
| 78 | store <16 x i8> %h3, <16 x i8>* %c3 |
| 79 | ret void |
| 80 | |
| 81 | ; CHECK-LABEL: aesea: |
| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 82 | ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} |
| 83 | ; CHECK-NEXT: aesmc [[VA]], [[VA]] |
| 84 | ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} |
| 85 | ; CHECK-NEXT: aesmc [[VB]], [[VB]] |
| 86 | ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} |
| 87 | ; CHECK-NEXT: aesmc [[VC]], [[VC]] |
| 88 | ; CHECK: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} |
| 89 | ; CHECK-NEXT: aesmc [[VD]], [[VD]] |
| 90 | ; CHECK: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} |
| 91 | ; CHECK-NEXT: aesmc [[VE]], [[VE]] |
| 92 | ; CHECK: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} |
| 93 | ; CHECK-NEXT: aesmc [[VF]], [[VF]] |
| 94 | ; CHECK: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} |
| 95 | ; CHECK-NEXT: aesmc [[VG]], [[VG]] |
| 96 | ; CHECK: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} |
| 97 | ; CHECK-NEXT: aesmc [[VH]], [[VH]] |
| 98 | ; CHECK-NOT: aesmc |
| Evandro Menezes | ec330cc | 2017-02-21 22:16:06 +0000 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) { |
| 102 | %d0 = load <16 x i8>, <16 x i8>* %a0 |
| 103 | %a1 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 1 |
| 104 | %d1 = load <16 x i8>, <16 x i8>* %a1 |
| 105 | %a2 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 2 |
| 106 | %d2 = load <16 x i8>, <16 x i8>* %a2 |
| 107 | %a3 = getelementptr inbounds <16 x i8>, <16 x i8>* %a0, i64 3 |
| 108 | %d3 = load <16 x i8>, <16 x i8>* %a3 |
| 109 | %k0 = load <16 x i8>, <16 x i8>* %b0 |
| 110 | %e00 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d0, <16 x i8> %k0) |
| 111 | %f00 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00) |
| 112 | %e01 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d1, <16 x i8> %k0) |
| 113 | %f01 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01) |
| 114 | %e02 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d2, <16 x i8> %k0) |
| 115 | %f02 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02) |
| 116 | %e03 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d3, <16 x i8> %k0) |
| 117 | %f03 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03) |
| 118 | %b1 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 1 |
| 119 | %k1 = load <16 x i8>, <16 x i8>* %b1 |
| 120 | %e10 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f00, <16 x i8> %k1) |
| 121 | %f10 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00) |
| 122 | %e11 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f01, <16 x i8> %k1) |
| 123 | %f11 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01) |
| 124 | %e12 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f02, <16 x i8> %k1) |
| 125 | %f12 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02) |
| 126 | %e13 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f03, <16 x i8> %k1) |
| 127 | %f13 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03) |
| 128 | %b2 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 2 |
| 129 | %k2 = load <16 x i8>, <16 x i8>* %b2 |
| 130 | %e20 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f10, <16 x i8> %k2) |
| 131 | %f20 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e10) |
| 132 | %e21 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f11, <16 x i8> %k2) |
| 133 | %f21 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e11) |
| 134 | %e22 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f12, <16 x i8> %k2) |
| 135 | %f22 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e12) |
| 136 | %e23 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f13, <16 x i8> %k2) |
| 137 | %f23 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e13) |
| 138 | %b3 = getelementptr inbounds <16 x i8>, <16 x i8>* %b0, i64 3 |
| 139 | %k3 = load <16 x i8>, <16 x i8>* %b3 |
| 140 | %e30 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f20, <16 x i8> %k3) |
| 141 | %f30 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e20) |
| 142 | %e31 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f21, <16 x i8> %k3) |
| 143 | %f31 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e21) |
| 144 | %e32 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f22, <16 x i8> %k3) |
| 145 | %f32 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e22) |
| 146 | %e33 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f23, <16 x i8> %k3) |
| 147 | %f33 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e23) |
| 148 | %g0 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f30, <16 x i8> %d) |
| 149 | %h0 = xor <16 x i8> %g0, %e |
| 150 | %g1 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f31, <16 x i8> %d) |
| 151 | %h1 = xor <16 x i8> %g1, %e |
| 152 | %g2 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f32, <16 x i8> %d) |
| 153 | %h2 = xor <16 x i8> %g2, %e |
| 154 | %g3 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f33, <16 x i8> %d) |
| 155 | %h3 = xor <16 x i8> %g3, %e |
| 156 | store <16 x i8> %h0, <16 x i8>* %c0 |
| 157 | %c1 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 1 |
| 158 | store <16 x i8> %h1, <16 x i8>* %c1 |
| 159 | %c2 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 2 |
| 160 | store <16 x i8> %h2, <16 x i8>* %c2 |
| 161 | %c3 = getelementptr inbounds <16 x i8>, <16 x i8>* %c0, i64 3 |
| 162 | store <16 x i8> %h3, <16 x i8>* %c3 |
| 163 | ret void |
| 164 | |
| 165 | ; CHECK-LABEL: aesda: |
| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 166 | ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} |
| 167 | ; CHECK-NEXT: aesimc [[VA]], [[VA]] |
| 168 | ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} |
| 169 | ; CHECK-NEXT: aesimc [[VB]], [[VB]] |
| 170 | ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} |
| 171 | ; CHECK-NEXT: aesimc [[VC]], [[VC]] |
| 172 | ; CHECK: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} |
| 173 | ; CHECK-NEXT: aesimc [[VD]], [[VD]] |
| 174 | ; CHECK: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} |
| 175 | ; CHECK-NEXT: aesimc [[VE]], [[VE]] |
| 176 | ; CHECK: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} |
| 177 | ; CHECK-NEXT: aesimc [[VF]], [[VF]] |
| 178 | ; CHECK: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} |
| 179 | ; CHECK-NEXT: aesimc [[VG]], [[VG]] |
| 180 | ; CHECK: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} |
| 181 | ; CHECK-NEXT: aesimc [[VH]], [[VH]] |
| 182 | ; CHECK-NOT: aesimc |
| Florian Hahn | abb4218 | 2017-05-23 09:33:34 +0000 | [diff] [blame] | 183 | } |
| 184 | |
| 185 | define void @aes_load_store(<16 x i8> *%p1, <16 x i8> *%p2 , <16 x i8> *%p3) { |
| 186 | entry: |
| 187 | %x1 = alloca <16 x i8>, align 16 |
| 188 | %x2 = alloca <16 x i8>, align 16 |
| 189 | %x3 = alloca <16 x i8>, align 16 |
| 190 | %x4 = alloca <16 x i8>, align 16 |
| 191 | %x5 = alloca <16 x i8>, align 16 |
| 192 | %in1 = load <16 x i8>, <16 x i8>* %p1, align 16 |
| 193 | store <16 x i8> %in1, <16 x i8>* %x1, align 16 |
| 194 | %aese1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in1) #2 |
| Florian Hahn | abb4218 | 2017-05-23 09:33:34 +0000 | [diff] [blame] | 195 | %in2 = load <16 x i8>, <16 x i8>* %p2, align 16 |
| 196 | %aesmc1= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese1) #2 |
| Florian Hahn | abb4218 | 2017-05-23 09:33:34 +0000 | [diff] [blame] | 197 | %aese2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in2) #2 |
| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 198 | store <16 x i8> %aesmc1, <16 x i8>* %x3, align 16 |
| 199 | %in3 = load <16 x i8>, <16 x i8>* %p3, align 16 |
| Florian Hahn | abb4218 | 2017-05-23 09:33:34 +0000 | [diff] [blame] | 200 | %aesmc2= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese2) #2 |
| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 201 | %aese3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc2, <16 x i8> %in3) #2 |
| 202 | store <16 x i8> %aese3, <16 x i8>* %x5, align 16 |
| Florian Hahn | abb4218 | 2017-05-23 09:33:34 +0000 | [diff] [blame] | 203 | ret void |
| 204 | |
| 205 | ; CHECK-LABEL: aes_load_store: |
| 206 | ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} |
| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 207 | ; CHECK-NEXT: aesmc [[VA]], [[VA]] |
| Florian Hahn | abb4218 | 2017-05-23 09:33:34 +0000 | [diff] [blame] | 208 | ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} |
| Florian Hahn | f63a5e9 | 2017-07-29 20:35:28 +0000 | [diff] [blame] | 209 | ; CHECK-NEXT: aesmc [[VB]], [[VB]] |
| Florian Hahn | 0a26d2c | 2017-06-15 09:31:23 +0000 | [diff] [blame] | 210 | ; CHECK-NOT: aesmc |
| Evandro Menezes | ec330cc | 2017-02-21 22:16:06 +0000 | [diff] [blame] | 211 | } |