[WebAssembly] Enhanced register stackification

This patch revamps the RegStackifier pass with a new tree traversal mechanism,
enabling three major new features:

 - Stackification of values with multiple uses, using the result value of set_local
 - More aggressive stackification of instructions with side effects
 - Reordering operands in commutative instructions to enable more stackification.

llvm-svn: 259009
diff --git a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
index 3b700e9..0083b41 100644
--- a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
+++ b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
@@ -90,17 +90,18 @@
 }
 
 ; Test an interesting case where the load has multiple uses and cannot
-; be trivially stackified.
+; be trivially stackified. However, it can be stackified with a tee_local.
 
 ; CHECK-LABEL: multiple_uses:
 ; CHECK-NEXT: .param       i32, i32, i32{{$}}
 ; CHECK-NEXT: .local       i32{{$}}
-; CHECK-NEXT: i32.load    $3=, 0($2){{$}}
 ; CHECK-NEXT: block{{$}}
-; CHECK-NEXT: i32.ge_u    $push0=, $3, $1{{$}}
-; CHECK-NEXT: br_if       $pop0, 0{{$}}
-; CHECK-NEXT: i32.lt_u    $push1=, $3, $0{{$}}
+; CHECK-NEXT: i32.load    $push0=, 0($2){{$}}
+; CHECK-NEXT: tee_local   $push3=, $3=, $pop0{{$}}
+; CHECK-NEXT: i32.ge_u    $push1=, $pop3, $1{{$}}
 ; CHECK-NEXT: br_if       $pop1, 0{{$}}
+; CHECK-NEXT: i32.lt_u    $push2=, $3, $0{{$}}
+; CHECK-NEXT: br_if       $pop2, 0{{$}}
 ; CHECK-NEXT: i32.store   $discard=, 0($2), $3{{$}}
 ; CHECK-NEXT: .LBB5_3:
 ; CHECK-NEXT: end_block{{$}}
@@ -145,4 +146,102 @@
   ret void
 }
 
+; Div instructions have side effects and can't be reordered, but this entire
+; function should still be able to be stackified because it's already in
+; tree order.
+
+; CHECK-LABEL: div_tree:
+; CHECK-NEXT: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}}
+; CHECK-NEXT: .result     i32{{$}}
+; CHECK-NEXT: i32.div_s   $push0=, $0, $1
+; CHECK-NEXT: i32.div_s   $push1=, $2, $3
+; CHECK-NEXT: i32.div_s   $push2=, $pop0, $pop1
+; CHECK-NEXT: i32.div_s   $push3=, $4, $5
+; CHECK-NEXT: i32.div_s   $push4=, $6, $7
+; CHECK-NEXT: i32.div_s   $push5=, $pop3, $pop4
+; CHECK-NEXT: i32.div_s   $push6=, $pop2, $pop5
+; CHECK-NEXT: i32.div_s   $push7=, $8, $9
+; CHECK-NEXT: i32.div_s   $push8=, $10, $11
+; CHECK-NEXT: i32.div_s   $push9=, $pop7, $pop8
+; CHECK-NEXT: i32.div_s   $push10=, $12, $13
+; CHECK-NEXT: i32.div_s   $push11=, $14, $15
+; CHECK-NEXT: i32.div_s   $push12=, $pop10, $pop11
+; CHECK-NEXT: i32.div_s   $push13=, $pop9, $pop12
+; CHECK-NEXT: i32.div_s   $push14=, $pop6, $pop13
+; CHECK-NEXT: return      $pop14
+define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) {
+entry:
+  %div = sdiv i32 %a, %b
+  %div1 = sdiv i32 %c, %d
+  %div2 = sdiv i32 %div, %div1
+  %div3 = sdiv i32 %e, %f
+  %div4 = sdiv i32 %g, %h
+  %div5 = sdiv i32 %div3, %div4
+  %div6 = sdiv i32 %div2, %div5
+  %div7 = sdiv i32 %i, %j
+  %div8 = sdiv i32 %k, %l
+  %div9 = sdiv i32 %div7, %div8
+  %div10 = sdiv i32 %m, %n
+  %div11 = sdiv i32 %o, %p
+  %div12 = sdiv i32 %div10, %div11
+  %div13 = sdiv i32 %div9, %div12
+  %div14 = sdiv i32 %div6, %div13
+  ret i32 %div14
+}
+
+; A simple multiple-use case.
+
+; CHECK-LABEL: simple_multiple_use:
+; CHECK-NEXT:  .param      i32, i32{{$}}
+; CHECK-NEXT:  i32.mul     $push0=, $1, $0{{$}}
+; CHECK-NEXT:  tee_local   $push1=, $0=, $pop0{{$}}
+; CHECK-NEXT:  call        use_a@FUNCTION, $pop1{{$}}
+; CHECK-NEXT:  call        use_b@FUNCTION, $0{{$}}
+; CHECK-NEXT:  return{{$}}
+declare void @use_a(i32)
+declare void @use_b(i32)
+define void @simple_multiple_use(i32 %x, i32 %y) {
+  %mul = mul i32 %y, %x
+  call void @use_a(i32 %mul)
+  call void @use_b(i32 %mul)
+  ret void
+}
+
+; Multiple uses of the same value in one instruction.
+
+; CHECK-LABEL: multiple_uses_in_same_insn:
+; CHECK-NEXT:  .param      i32, i32{{$}}
+; CHECK-NEXT:  i32.mul     $push0=, $1, $0{{$}}
+; CHECK-NEXT:  tee_local   $push1=, $0=, $pop0{{$}}
+; CHECK-NEXT:  call        use_2@FUNCTION, $pop1, $0{{$}}
+; CHECK-NEXT:  return{{$}}
+declare void @use_2(i32, i32)
+define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
+  %mul = mul i32 %y, %x
+  call void @use_2(i32 %mul, i32 %mul)
+  ret void
+}
+
+; Commute operands to achieve better stackifying.
+
+; CHECK-LABEL: commute:
+; CHECK-NEXT:  .result     i32{{$}}
+; CHECK-NEXT:  i32.call    $push0=, red@FUNCTION{{$}}
+; CHECK-NEXT:  i32.call    $push1=, green@FUNCTION{{$}}
+; CHECK-NEXT:  i32.add     $push2=, $pop0, $pop1{{$}}
+; CHECK-NEXT:  i32.call    $push3=, blue@FUNCTION{{$}}
+; CHECK-NEXT:  i32.add     $push4=, $pop2, $pop3{{$}}
+; CHECK-NEXT:  return      $pop4{{$}}
+declare i32 @red()
+declare i32 @green()
+declare i32 @blue()
+define i32 @commute() {
+  %call = call i32 @red()
+  %call1 = call i32 @green()
+  %add = add i32 %call1, %call
+  %call2 = call i32 @blue()
+  %add3 = add i32 %add, %call2
+  ret i32 %add3
+}
+
 !0 = !{}