[CodeGen] Add support for multiple memory operands in MachineInstr::mayAlias

Summary:
To support all targets, the mayAlias member function needs to support instructions with multiple operands.

This revision also changes the order of the emitted instructions in some test cases.

Reviewers: efriedma, hfinkel, craig.topper, dmgreen

Reviewed By: efriedma

Subscribers: MatzeB, dmgreen, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80161
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 5613db1..d419629 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -19,11 +19,11 @@
 ; A53-NEXT:    mov x19, x8
 ; A53-NEXT:    mov w0, w1
 ; A53-NEXT:    mov w9, #256
+; A53-NEXT:    stp x2, x3, [x8, #32]
+; A53-NEXT:    mov x2, x8
 ; A53-NEXT:    str q0, [x19, #16]!
 ; A53-NEXT:    str w1, [x19]
 ; A53-NEXT:    mov w1, #4
-; A53-NEXT:    stp x2, x3, [x8, #32]
-; A53-NEXT:    mov x2, x8
 ; A53-NEXT:    str q0, [x8]
 ; A53-NEXT:    strh w9, [x8, #24]
 ; A53-NEXT:    str wzr, [x8, #20]
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 9942d6d..693f335 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -503,12 +503,12 @@
 ; CHECK-NEXT:    vmov.32 r3, d16[1]
 ; CHECK-NEXT:    vmov.32 r1, d16[0]
 ; CHECK-NEXT:    subs r12, r12, #1
+; CHECK-NEXT:    str r12, [r0, #12]
 ; CHECK-NEXT:    sbcs r2, r2, #0
+; CHECK-NEXT:    str r2, [r0, #8]
 ; CHECK-NEXT:    sbcs r3, r3, #0
 ; CHECK-NEXT:    sbc r1, r1, #0
 ; CHECK-NEXT:    stm r0, {r1, r3}
-; CHECK-NEXT:    str r2, [r0, #8]
-; CHECK-NEXT:    str r12, [r0, #12]
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
index 88b772c..1a2ad4d 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
@@ -9,7 +9,7 @@
 ; CHECK:       ********** MI Scheduling **********
 ; We need second, post-ra scheduling to have VLDM instruction combined from single-loads
 ; CHECK:       ********** MI Scheduling **********
-; CHECK:       VLDMDIA_UPD
+; CHECK:       SU(1):{{.*}}VLDMDIA_UPD
 ; CHECK:       rdefs left
 ; CHECK-NEXT:  Latency            : 6
 ; CHECK:       Successors:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
index c517f46..3007630 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
@@ -5,7 +5,7 @@
 ; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
 ; CHECK:       ********** MI Scheduling **********
 ; CHECK:       schedule starting
-; CHECK:       VSTMDIA_UPD
+; CHECK:       SU(2):{{.*}}VSTMDIA_UPD
 ; CHECK:       rdefs left
 ; CHECK-NEXT:  Latency            : 4
 ; CHECK:       Successors:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
index 5e9041c..f88bb47 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
@@ -5,7 +5,7 @@
 ; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
 ; CHECK:       ********** MI Scheduling **********
 ; CHECK:       schedule starting
-; CHECK:       VSTMDIA
+; CHECK:       SU(3):{{.*}}VSTMDIA
 ; CHECK:       rdefs left
 ; CHECK-NEXT:  Latency            : 2
 
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 111a587..02bd955 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1092,6 +1092,7 @@
 ; CHECK-NEXT:    ldrd lr, r10, [r12, #24]
 ; CHECK-NEXT:    vstrb.8 q0, [r11], #16
 ; CHECK-NEXT:    vldrw.u32 q0, [r8], #32
+; CHECK-NEXT:    strd r11, r1, [sp, #24] @ 8-byte Folded Spill
 ; CHECK-NEXT:    vldrw.u32 q1, [r8, #-28]
 ; CHECK-NEXT:    vmul.f32 q0, q0, r0
 ; CHECK-NEXT:    vldrw.u32 q6, [r8, #-24]
@@ -1103,13 +1104,12 @@
 ; CHECK-NEXT:    vfma.f32 q0, q4, r6
 ; CHECK-NEXT:    vldrw.u32 q3, [r8, #-8]
 ; CHECK-NEXT:    vfma.f32 q0, q5, r5
-; CHECK-NEXT:    vldrw.u32 q1, [r8, #-4]
-; CHECK-NEXT:    vfma.f32 q0, q2, r3
 ; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    vfma.f32 q0, q2, r3
+; CHECK-NEXT:    vldrw.u32 q1, [r8, #-4]
 ; CHECK-NEXT:    vfma.f32 q0, q3, lr
-; CHECK-NEXT:    strd r11, r1, [sp, #24] @ 8-byte Folded Spill
-; CHECK-NEXT:    vfma.f32 q0, q1, r10
 ; CHECK-NEXT:    cmp r0, #16
+; CHECK-NEXT:    vfma.f32 q0, q1, r10
 ; CHECK-NEXT:    blo .LBB16_7
 ; CHECK-NEXT:  @ %bb.5: @ %for.body.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB16_4 Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
index e7d6a73..0fe26fb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
@@ -168,16 +168,14 @@
 ; CHECK-NEXT:    vmov q1, q4
 ; CHECK-NEXT:    vmov s1, r7
 ; CHECK-NEXT:    vmov.32 q1[1], r6
-; CHECK-NEXT:    mov.w r10, #0
-; CHECK-NEXT:    vmov.32 q1[2], r5
 ; CHECK-NEXT:    vmov.32 q5[0], r7
+; CHECK-NEXT:    vmov.32 q1[2], r5
+; CHECK-NEXT:    vmov s9, r4
 ; CHECK-NEXT:    vmov.32 q1[3], r4
-; CHECK-NEXT:    strd r0, r10, [sp, #24]
+; CHECK-NEXT:    vdup.32 q6, r7
 ; CHECK-NEXT:    vstrw.32 q1, [sp, #76]
 ; CHECK-NEXT:    vmov q1, q5
-; CHECK-NEXT:    vmov s9, r4
 ; CHECK-NEXT:    vmov.32 q1[1], r7
-; CHECK-NEXT:    vdup.32 q6, r7
 ; CHECK-NEXT:    vmov.f32 s2, s1
 ; CHECK-NEXT:    vmov.f32 s8, s0
 ; CHECK-NEXT:    vmov.32 q1[2], r6
@@ -185,6 +183,7 @@
 ; CHECK-NEXT:    vmov q7, q6
 ; CHECK-NEXT:    vmov.f32 s10, s1
 ; CHECK-NEXT:    mov.w r8, #4
+; CHECK-NEXT:    mov.w r10, #0
 ; CHECK-NEXT:    vmov.32 q1[3], r4
 ; CHECK-NEXT:    vmov.32 q3[0], r4
 ; CHECK-NEXT:    vmov.32 q7[1], r4
@@ -192,6 +191,7 @@
 ; CHECK-NEXT:    vmov.f32 s11, s3
 ; CHECK-NEXT:    movs r1, #64
 ; CHECK-NEXT:    strh.w r8, [sp, #390]
+; CHECK-NEXT:    strd r0, r10, [sp, #24]
 ; CHECK-NEXT:    vstrw.32 q0, [sp, #44]
 ; CHECK-NEXT:    str r0, [r0]
 ; CHECK-NEXT:    vstrw.32 q2, [r0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 52de7a4..1f35029 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -24,8 +24,8 @@
 ; CHECK-NEXT:    vmov.f32 s9, s6
 ; CHECK-NEXT:    vmov.f32 s10, s0
 ; CHECK-NEXT:    vmov.f32 s11, s5
-; CHECK-NEXT:    strd r2, r0, [r1, #16]
 ; CHECK-NEXT:    vstrw.32 q2, [r1]
+; CHECK-NEXT:    strd r2, r0, [r1, #16]
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index ac1c814..f57c922 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -8,17 +8,17 @@
 ; THUMBV7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; THUMBV7-NEXT:    .pad #44
 ; THUMBV7-NEXT:    sub sp, #44
-; THUMBV7-NEXT:    ldrd r4, r7, [sp, #88]
-; THUMBV7-NEXT:    mov r5, r3
 ; THUMBV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
 ; THUMBV7-NEXT:    movs r0, #0
-; THUMBV7-NEXT:    strd r4, r7, [sp]
-; THUMBV7-NEXT:    mov r1, r3
+; THUMBV7-NEXT:    ldrd r4, r7, [sp, #88]
+; THUMBV7-NEXT:    mov r5, r3
 ; THUMBV7-NEXT:    strd r0, r0, [sp, #8]
+; THUMBV7-NEXT:    mov r1, r3
 ; THUMBV7-NEXT:    mov r6, r2
 ; THUMBV7-NEXT:    mov r0, r2
 ; THUMBV7-NEXT:    movs r2, #0
 ; THUMBV7-NEXT:    movs r3, #0
+; THUMBV7-NEXT:    strd r4, r7, [sp]
 ; THUMBV7-NEXT:    bl __multi3
 ; THUMBV7-NEXT:    strd r1, r0, [sp, #32] @ 8-byte Folded Spill
 ; THUMBV7-NEXT:    strd r3, r2, [sp, #24] @ 8-byte Folded Spill
diff --git a/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir b/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir
new file mode 100644
index 0000000..0259f42
--- /dev/null
+++ b/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir
@@ -0,0 +1,144 @@
+# RUN: llc -mtriple=i686-- -o - -run-pass=machine-scheduler -debug %s 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+--- |
+  %struct.Macroblock.0.1.2.3.6.17 = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock.0.1.2.3.6.17*, %struct.Macroblock.0.1.2.3.6.17*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+  
+  define void @stepsystem(i32 %x) {
+  entry:
+    %0 = load i32, i32* undef, align 8
+    %inc = add i32 %x, 1
+    store i32 %inc, i32* undef, align 8
+    store <2 x double> <double 0xD47D42AEA2879F2E, double 0xD47D42AEA2879F2E>, <2 x double>* undef, align 8
+    ret void
+  }
+  
+  define void @dct_chroma() {
+  cond_true2732.preheader:
+    %tmp2666 = getelementptr %struct.Macroblock.0.1.2.3.6.17, %struct.Macroblock.0.1.2.3.6.17* null, i32 0, i32 13
+    %tmp2667.us.us = load i64, i64* %tmp2666, align 4
+    %tmp2670.us.us = load i64, i64* null, align 4
+    %tmp2675.us.us = shl i64 %tmp2670.us.us, 0
+    %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1
+    %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us
+    store i64 %tmp2676.us.us, i64* %tmp2666, align 4
+    ret void
+  }
+
+...
+---
+name:            stepsystem
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+  - { id: 1, class: gr32, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr32, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+  - { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: default, 
+      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    %1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0)
+    %1:gr32 = INC32r %1, implicit-def dead $eflags
+    MOV32mr undef %2:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `i32* undef`, align 8)
+    MOV32mi undef %3:gr32, 1, $noreg, 0, $noreg, -729988434 :: (store 4 into `<2 x double>* undef` + 12)
+    MOV32mi undef %4:gr32, 1, $noreg, 0, $noreg, -1568170194 :: (store 4 into `<2 x double>* undef` + 8, align 8)
+    RET 0
+
+# CHECK-LABEL: stepsystem
+# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to:   MOV32mi {{.*}} :: (store 4 {{.*}})
+# CHECK: Adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to:   MOV32mr {{.*}} :: (store 4 {{.*}})
+...
+---
+name:            dct_chroma
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+  - { id: 1, class: gr32, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr32, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.cond_true2732.preheader:
+    %4:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i64* null`)
+    %2:gr32 = MOV32rm $noreg, 1, $noreg, 4, $noreg :: (load 4 from `i64* null` + 4)
+    %2:gr32 = NOT32r %2
+    %4:gr32 = NOT32r %4
+    %4:gr32 = AND32rm %4, $noreg, 1, $noreg, 356, $noreg, implicit-def dead $eflags :: (load 4 from %ir.tmp2666)
+    AND32mr $noreg, 1, $noreg, 360, $noreg, %2, implicit-def dead $eflags :: (store 4 into %ir.tmp2666 + 4), (load 4 from %ir.tmp2666 + 4)
+    MOV32mr $noreg, 1, $noreg, 356, $noreg, %4 :: (store 4 into %ir.tmp2666)
+    RET 0
+
+# Chain dependencies should not be systematically added when at least one of
+# the instructions has more than one memory operand. It should only be added
+# where it would be needed.
+# CHECK-LABEL: dct_chroma
+# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mr {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to:   AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}})
+# CHECK: Adding chain dependency{{[[:space:]]*}}from: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}}){{[[:space:]]*}}to:   %{{.*}} = MOV32rm {{.*}} :: (load 4 {{.*}})
+
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
index 674b8d8..00db079 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
@@ -17,13 +17,12 @@
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
 
-; INTEL: 	and	{{e..}}, dword ptr [356]
 ; INTEL:	and	dword ptr [360], {{e..}}
-; FIXME:	mov	dword ptr [356], {{e..}}
-; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
+; INTEL: 	and	{{e..}}, dword ptr [356]
+; INTEL:	mov	dword ptr [356], {{e..}}
 
-; ATT: 	andl	356, %{{e..}}
 ; ATT:	andl	%{{e..}}, 360
+; ATT: 	andl	356, %{{e..}}
 ; ATT:	movl	%{{e..}}, 356
 
 }