Matt Arsenault | 6689abe | 2016-05-05 20:07:37 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC |
Matt Arsenault | 7aad8fd | 2017-01-24 22:02:15 +0000 | [diff] [blame] | 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC |
Matt Arsenault | 6689abe | 2016-05-05 20:07:37 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC |
Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 4 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 5 | ; FUNC-LABEL: {{^}}fadd_f32: |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 6 | ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 7 | ; SI: v_add_f32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 8 | define amdgpu_kernel void @fadd_f32(float addrspace(1)* %out, float %a, float %b) #0 { |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 9 | %add = fadd float %a, %b |
| 10 | store float %add, float addrspace(1)* %out, align 4 |
Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 11 | ret void |
| 12 | } |
| 13 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 14 | ; FUNC-LABEL: {{^}}fadd_v2f32: |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 15 | ; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z |
| 16 | ; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 17 | ; SI: v_add_f32 |
| 18 | ; SI: v_add_f32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 19 | define amdgpu_kernel void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 20 | %add = fadd <2 x float> %a, %b |
| 21 | store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8 |
Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 22 | ret void |
| 23 | } |
| 24 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 25 | ; FUNC-LABEL: {{^}}fadd_v4f32: |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 26 | ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 27 | ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 28 | ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 29 | ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 30 | ; SI: v_add_f32 |
| 31 | ; SI: v_add_f32 |
| 32 | ; SI: v_add_f32 |
| 33 | ; SI: v_add_f32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 34 | define amdgpu_kernel void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 35 | %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 36 | %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16 |
| 37 | %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16 |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 38 | %result = fadd <4 x float> %a, %b |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 39 | store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 40 | ret void |
| 41 | } |
Tom Stellard | 967bf58 | 2014-02-13 23:34:15 +0000 | [diff] [blame] | 42 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 43 | ; FUNC-LABEL: {{^}}fadd_v8f32: |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 44 | ; R600: ADD |
| 45 | ; R600: ADD |
| 46 | ; R600: ADD |
| 47 | ; R600: ADD |
| 48 | ; R600: ADD |
| 49 | ; R600: ADD |
| 50 | ; R600: ADD |
| 51 | ; R600: ADD |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 52 | ; SI: v_add_f32 |
| 53 | ; SI: v_add_f32 |
| 54 | ; SI: v_add_f32 |
| 55 | ; SI: v_add_f32 |
| 56 | ; SI: v_add_f32 |
| 57 | ; SI: v_add_f32 |
| 58 | ; SI: v_add_f32 |
| 59 | ; SI: v_add_f32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 60 | define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { |
Matt Arsenault | a6dc6c2 | 2014-08-06 20:27:55 +0000 | [diff] [blame] | 61 | %add = fadd <8 x float> %a, %b |
| 62 | store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32 |
Tom Stellard | 967bf58 | 2014-02-13 23:34:15 +0000 | [diff] [blame] | 63 | ret void |
| 64 | } |
Matt Arsenault | 9a3fd87 | 2017-03-09 01:36:39 +0000 | [diff] [blame] | 65 | |
| 66 | ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32: |
| 67 | ; SI-NOT: v_add_f32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 68 | define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 { |
Matt Arsenault | 9a3fd87 | 2017-03-09 01:36:39 +0000 | [diff] [blame] | 69 | %add = fadd float %a, 0.0 |
| 70 | store float %add, float addrspace(1)* %out, align 4 |
| 71 | ret void |
| 72 | } |
| 73 | |
| 74 | attributes #0 = { nounwind } |
| 75 | attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } |