| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 1 | ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched | 
| NAKAMURA Takumi | e781913 | 2013-03-11 23:16:30 +0000 | [diff] [blame] | 2 | ;REQUIRES: asserts | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 3 |  | 
| Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 4 | define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) { | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 5 | main_body: | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 6 | %0 = extractelement <4 x float> %reg1, i32 0 | 
|  | 7 | %1 = extractelement <4 x float> %reg1, i32 1 | 
|  | 8 | %2 = extractelement <4 x float> %reg1, i32 2 | 
|  | 9 | %3 = extractelement <4 x float> %reg1, i32 3 | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 10 | %4 = fcmp ult float %0, 0.000000e+00 | 
|  | 11 | %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 | 
|  | 12 | %6 = fsub float -0.000000e+00, %5 | 
|  | 13 | %7 = fptosi float %6 to i32 | 
|  | 14 | %8 = bitcast i32 %7 to float | 
|  | 15 | %9 = bitcast float %8 to i32 | 
|  | 16 | %10 = icmp ne i32 %9, 0 | 
|  | 17 | br i1 %10, label %LOOP, label %ENDIF | 
|  | 18 |  | 
|  | 19 | ENDIF:                                            ; preds = %ENDIF16, %LOOP, %main_body | 
|  | 20 | %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ] | 
|  | 21 | %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ] | 
|  | 22 | %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ] | 
|  | 23 | %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ] | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 24 | %11 = load <4 x float>, <4 x float> addrspace(9)* null | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 25 | %12 = extractelement <4 x float> %11, i32 0 | 
|  | 26 | %13 = fmul float %12, %0 | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 27 | %14 = load <4 x float>, <4 x float> addrspace(9)* null | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 28 | %15 = extractelement <4 x float> %14, i32 1 | 
|  | 29 | %16 = fmul float %15, %0 | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 30 | %17 = load <4 x float>, <4 x float> addrspace(9)* null | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 31 | %18 = extractelement <4 x float> %17, i32 2 | 
|  | 32 | %19 = fmul float %18, %0 | 
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 33 | %20 = load <4 x float>, <4 x float> addrspace(9)* null | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 34 | %21 = extractelement <4 x float> %20, i32 3 | 
|  | 35 | %22 = fmul float %21, %0 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 36 | %23 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 37 | %24 = extractelement <4 x float> %23, i32 0 | 
|  | 38 | %25 = fmul float %24, %1 | 
|  | 39 | %26 = fadd float %25, %13 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 40 | %27 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 41 | %28 = extractelement <4 x float> %27, i32 1 | 
|  | 42 | %29 = fmul float %28, %1 | 
|  | 43 | %30 = fadd float %29, %16 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 44 | %31 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 45 | %32 = extractelement <4 x float> %31, i32 2 | 
|  | 46 | %33 = fmul float %32, %1 | 
|  | 47 | %34 = fadd float %33, %19 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 48 | %35 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 49 | %36 = extractelement <4 x float> %35, i32 3 | 
|  | 50 | %37 = fmul float %36, %1 | 
|  | 51 | %38 = fadd float %37, %22 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 52 | %39 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 53 | %40 = extractelement <4 x float> %39, i32 0 | 
|  | 54 | %41 = fmul float %40, %2 | 
|  | 55 | %42 = fadd float %41, %26 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 56 | %43 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 57 | %44 = extractelement <4 x float> %43, i32 1 | 
|  | 58 | %45 = fmul float %44, %2 | 
|  | 59 | %46 = fadd float %45, %30 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 60 | %47 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 61 | %48 = extractelement <4 x float> %47, i32 2 | 
|  | 62 | %49 = fmul float %48, %2 | 
|  | 63 | %50 = fadd float %49, %34 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 64 | %51 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 65 | %52 = extractelement <4 x float> %51, i32 3 | 
|  | 66 | %53 = fmul float %52, %2 | 
|  | 67 | %54 = fadd float %53, %38 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 68 | %55 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 69 | %56 = extractelement <4 x float> %55, i32 0 | 
|  | 70 | %57 = fmul float %56, %3 | 
|  | 71 | %58 = fadd float %57, %42 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 72 | %59 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 73 | %60 = extractelement <4 x float> %59, i32 1 | 
|  | 74 | %61 = fmul float %60, %3 | 
|  | 75 | %62 = fadd float %61, %46 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 76 | %63 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 77 | %64 = extractelement <4 x float> %63, i32 2 | 
|  | 78 | %65 = fmul float %64, %3 | 
|  | 79 | %66 = fadd float %65, %50 | 
| David Blaikie | f72d05b | 2015-03-13 18:20:45 +0000 | [diff] [blame] | 80 | %67 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 81 | %68 = extractelement <4 x float> %67, i32 3 | 
|  | 82 | %69 = fmul float %68, %3 | 
|  | 83 | %70 = fadd float %69, %54 | 
|  | 84 | %71 = insertelement <4 x float> undef, float %58, i32 0 | 
|  | 85 | %72 = insertelement <4 x float> %71, float %62, i32 1 | 
|  | 86 | %73 = insertelement <4 x float> %72, float %66, i32 2 | 
|  | 87 | %74 = insertelement <4 x float> %73, float %70, i32 3 | 
| Matt Arsenault | 82e5e1e | 2016-07-15 21:27:08 +0000 | [diff] [blame] | 88 | call void @llvm.r600.store.swizzle(<4 x float> %74, i32 60, i32 1) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 89 | %75 = insertelement <4 x float> undef, float %temp.0, i32 0 | 
|  | 90 | %76 = insertelement <4 x float> %75, float %temp1.0, i32 1 | 
|  | 91 | %77 = insertelement <4 x float> %76, float %temp2.0, i32 2 | 
|  | 92 | %78 = insertelement <4 x float> %77, float %temp3.0, i32 3 | 
| Matt Arsenault | 82e5e1e | 2016-07-15 21:27:08 +0000 | [diff] [blame] | 93 | call void @llvm.r600.store.swizzle(<4 x float> %78, i32 0, i32 2) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 94 | ret void | 
|  | 95 |  | 
|  | 96 | LOOP:                                             ; preds = %main_body, %ENDIF19 | 
|  | 97 | %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ] | 
|  | 98 | %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ] | 
|  | 99 | %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ] | 
|  | 100 | %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ] | 
|  | 101 | %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ] | 
|  | 102 | %79 = fcmp uge float %temp4.0, %0 | 
|  | 103 | %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00 | 
|  | 104 | %81 = fsub float -0.000000e+00, %80 | 
|  | 105 | %82 = fptosi float %81 to i32 | 
|  | 106 | %83 = bitcast i32 %82 to float | 
|  | 107 | %84 = bitcast float %83 to i32 | 
|  | 108 | %85 = icmp ne i32 %84, 0 | 
|  | 109 | br i1 %85, label %ENDIF, label %ENDIF16 | 
|  | 110 |  | 
|  | 111 | ENDIF16:                                          ; preds = %LOOP | 
|  | 112 | %86 = fcmp une float %2, %temp4.0 | 
|  | 113 | %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00 | 
|  | 114 | %88 = fsub float -0.000000e+00, %87 | 
|  | 115 | %89 = fptosi float %88 to i32 | 
|  | 116 | %90 = bitcast i32 %89 to float | 
|  | 117 | %91 = bitcast float %90 to i32 | 
|  | 118 | %92 = icmp ne i32 %91, 0 | 
|  | 119 | br i1 %92, label %ENDIF, label %ENDIF19 | 
|  | 120 |  | 
|  | 121 | ENDIF19:                                          ; preds = %ENDIF16 | 
|  | 122 | %93 = fadd float %temp.1, 1.000000e+00 | 
|  | 123 | %94 = fadd float %temp1.1, 0.000000e+00 | 
|  | 124 | %95 = fadd float %temp2.1, 0.000000e+00 | 
|  | 125 | %96 = fadd float %temp3.1, 0.000000e+00 | 
|  | 126 | %97 = fadd float %temp4.0, 1.000000e+00 | 
|  | 127 | br label %LOOP | 
|  | 128 | } | 
|  | 129 |  | 
| Matt Arsenault | 82e5e1e | 2016-07-15 21:27:08 +0000 | [diff] [blame] | 130 | declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) |