Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s |
Matt Arsenault | 7aad8fd | 2017-01-24 22:02:15 +0000 | [diff] [blame] | 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s |
Tom Stellard | 3693080 | 2014-12-03 04:08:00 +0000 | [diff] [blame] | 3 | |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 4 | ; CHECK-LABEL: {{^}}inline_asm: |
Tom Stellard | 3693080 | 2014-12-03 04:08:00 +0000 | [diff] [blame] | 5 | ; CHECK: s_endpgm |
| 6 | ; CHECK: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 7 | define amdgpu_kernel void @inline_asm(i32 addrspace(1)* %out) { |
Tom Stellard | 3693080 | 2014-12-03 04:08:00 +0000 | [diff] [blame] | 8 | entry: |
| 9 | store i32 5, i32 addrspace(1)* %out |
| 10 | call void asm sideeffect "s_endpgm", ""() |
| 11 | ret void |
| 12 | } |
Nicolai Haehnle | a61e5a8 | 2016-01-06 22:01:04 +0000 | [diff] [blame] | 13 | |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 14 | ; CHECK-LABEL: {{^}}inline_asm_shader: |
Nicolai Haehnle | a61e5a8 | 2016-01-06 22:01:04 +0000 | [diff] [blame] | 15 | ; CHECK: s_endpgm |
| 16 | ; CHECK: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 17 | define amdgpu_ps void @inline_asm_shader() { |
Nicolai Haehnle | a61e5a8 | 2016-01-06 22:01:04 +0000 | [diff] [blame] | 18 | entry: |
| 19 | call void asm sideeffect "s_endpgm", ""() |
| 20 | ret void |
| 21 | } |
| 22 | |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 23 | |
| 24 | ; CHECK: {{^}}branch_on_asm: |
| 25 | ; Make sure inline assembly is treted as divergent. |
| 26 | ; CHECK: s_mov_b32 s{{[0-9]+}}, 0 |
| 27 | ; CHECK: s_and_saveexec_b64 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 28 | define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) { |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 29 | %zero = call i32 asm "s_mov_b32 $0, 0", "=s"() |
| 30 | %cmp = icmp eq i32 %zero, 0 |
| 31 | br i1 %cmp, label %if, label %endif |
| 32 | |
| 33 | if: |
| 34 | store i32 0, i32 addrspace(1)* %out |
| 35 | br label %endif |
| 36 | |
| 37 | endif: |
| 38 | ret void |
| 39 | } |
Tom Stellard | 9f2e00d | 2016-03-09 16:02:52 +0000 | [diff] [blame] | 40 | |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 41 | ; CHECK-LABEL: {{^}}v_cmp_asm: |
Tom Stellard | 9f2e00d | 2016-03-09 16:02:52 +0000 | [diff] [blame] | 42 | ; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 43 | ; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]] |
Tom Stellard | 9f2e00d | 2016-03-09 16:02:52 +0000 | [diff] [blame] | 44 | ; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]] |
| 45 | ; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]] |
| 46 | ; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 47 | define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) { |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 48 | %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in) |
Tom Stellard | 9f2e00d | 2016-03-09 16:02:52 +0000 | [diff] [blame] | 49 | store i64 %sgpr, i64 addrspace(1)* %out |
| 50 | ret void |
| 51 | } |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 52 | |
| 53 | ; CHECK-LABEL: {{^}}code_size_inline_asm: |
| 54 | ; CHECK: codeLenInByte = 12 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 55 | define amdgpu_kernel void @code_size_inline_asm(i32 addrspace(1)* %out) { |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 56 | entry: |
| 57 | call void asm sideeffect "v_nop_e64", ""() |
| 58 | ret void |
| 59 | } |
| 60 | |
| 61 | ; All inlineasm instructions are assumed to be the maximum size |
| 62 | ; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst: |
| 63 | ; CHECK: codeLenInByte = 12 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 64 | define amdgpu_kernel void @code_size_inline_asm_small_inst(i32 addrspace(1)* %out) { |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 65 | entry: |
| 66 | call void asm sideeffect "v_nop_e32", ""() |
| 67 | ret void |
| 68 | } |
| 69 | |
| 70 | ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst: |
| 71 | ; CHECK: codeLenInByte = 20 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 72 | define amdgpu_kernel void @code_size_inline_asm_2_inst(i32 addrspace(1)* %out) { |
Matt Arsenault | a9720c6 | 2016-06-20 17:51:32 +0000 | [diff] [blame] | 73 | entry: |
| 74 | call void asm sideeffect " |
| 75 | v_nop_e64 |
| 76 | v_nop_e64 |
| 77 | ", ""() |
| 78 | ret void |
| 79 | } |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 80 | |
| 81 | ; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline: |
| 82 | ; CHECK: codeLenInByte = 20 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 83 | define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 84 | entry: |
| 85 | call void asm sideeffect " |
| 86 | v_nop_e64 |
| 87 | |
| 88 | v_nop_e64 |
| 89 | ", ""() |
| 90 | ret void |
| 91 | } |
| 92 | |
| 93 | ; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst: |
| 94 | ; CHECK: codeLenInByte = 4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 95 | define amdgpu_kernel void @code_size_inline_asm_0_inst(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 96 | entry: |
| 97 | call void asm sideeffect "", ""() |
| 98 | ret void |
| 99 | } |
| 100 | |
| 101 | ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment: |
| 102 | ; CHECK: codeLenInByte = 4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 103 | define amdgpu_kernel void @code_size_inline_asm_1_comment(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 104 | entry: |
| 105 | call void asm sideeffect "; comment", ""() |
| 106 | ret void |
| 107 | } |
| 108 | |
| 109 | ; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment: |
| 110 | ; CHECK: codeLenInByte = 4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 111 | define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 112 | entry: |
| 113 | call void asm sideeffect " |
| 114 | ; comment", ""() |
| 115 | ret void |
| 116 | } |
| 117 | |
| 118 | ; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline: |
| 119 | ; CHECK: codeLenInByte = 4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 120 | define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 121 | entry: |
| 122 | call void asm sideeffect "; comment |
| 123 | ", ""() |
| 124 | ret void |
| 125 | } |
| 126 | |
| 127 | ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line: |
| 128 | ; CHECK: codeLenInByte = 4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 129 | define amdgpu_kernel void @code_size_inline_asm_2_comments_line(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 130 | entry: |
| 131 | call void asm sideeffect "; first comment ; second comment", ""() |
| 132 | ret void |
| 133 | } |
| 134 | |
| 135 | ; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace: |
| 136 | ; CHECK: codeLenInByte = 4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 137 | define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 138 | entry: |
| 139 | call void asm sideeffect "; first comment;second comment", ""() |
| 140 | ret void |
| 141 | } |
| 142 | |
| 143 | ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0: |
| 144 | ; CHECK: codeLenInByte = 20 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 145 | define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 146 | entry: |
| 147 | call void asm sideeffect "; comment |
| 148 | v_nop_e64 ; inline comment |
| 149 | ; separate comment |
| 150 | v_nop_e64 |
| 151 | |
| 152 | ; trailing comment |
| 153 | ; extra comment |
| 154 | ", ""() |
| 155 | ret void |
| 156 | } |
| 157 | |
| 158 | ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1: |
| 159 | ; CHECK: codeLenInByte = 20 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 160 | define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 161 | entry: |
| 162 | call void asm sideeffect "v_nop_e64 ; inline comment |
| 163 | ; separate comment |
| 164 | v_nop_e64 |
| 165 | |
| 166 | ; trailing comment |
| 167 | ; extra comment |
| 168 | ", ""() |
| 169 | ret void |
| 170 | } |
| 171 | |
| 172 | ; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands: |
| 173 | ; CHECK: codeLenInByte = 20 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 174 | define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(i32 addrspace(1)* %out) { |
Matt Arsenault | accddac | 2016-07-01 23:26:50 +0000 | [diff] [blame] | 175 | entry: |
| 176 | call void asm sideeffect "; comment |
| 177 | v_add_i32_e32 v0, vcc, v1, v2 ; inline comment |
| 178 | ; separate comment |
| 179 | v_bfrev_b32_e32 v0, 1 |
| 180 | |
| 181 | ; trailing comment |
| 182 | ; extra comment |
| 183 | ", ""() |
| 184 | ret void |
| 185 | } |