[RISCV] Custom-legalise 32-bit variable shifts on RV64

The previous DAG combiner-based approach had an issue with infinite loops
between the target-dependent and target-independent combiner logic (see
PR40333). Although this was worked around in rL351806, the combiner-based
approach is still potentially brittle and can fail to select the 32-bit shift
variant when profitable to do so, as demonstrated in the pr40333.ll test case.

This patch instead introduces target-specific SelectionDAG nodes for
SHLW/SRLW/SRAW and custom-lowers variable i32 shifts to them. pr40333.ll is a
good example of how this approach can improve codegen.

This adds DAG combine that does SimplifyDemandedBits on the operands (only
lower 32-bits of first operand and lower 5 bits of second operand are read).
This seems better than implementing SimplifyDemandedBitsForTargetNode as there
is no guarantee that would be called (and it's not for e.g. the anyext return
test cases). Also implements ComputeNumSignBitsForTargetNode.

There are codegen changes in atomic-rmw.ll and atomic-cmpxchg.ll but the new
instruction sequences are semantically equivalent.

Differential Revision: https://reviews.llvm.org/D57085

llvm-svn: 352169
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index 8818645..e31b45e 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -61,8 +61,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_monotonic_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -139,8 +139,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acquire_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -217,8 +217,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acquire_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -295,8 +295,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_release_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -373,8 +373,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_release_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -451,8 +451,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acq_rel_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -529,8 +529,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acq_rel_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -607,8 +607,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_seq_cst_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -685,8 +685,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_seq_cst_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -763,8 +763,8 @@
 ;
 ; RV64IA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -846,8 +846,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -926,8 +926,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1006,8 +1006,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1086,8 +1086,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1166,8 +1166,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1246,8 +1246,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1326,8 +1326,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1406,8 +1406,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1486,8 +1486,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1566,8 +1566,8 @@
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 1bd5e9e..fa874e6 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -52,8 +52,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -118,8 +118,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -184,8 +184,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -250,8 +250,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -316,8 +316,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -382,8 +382,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -448,8 +448,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -514,8 +514,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -580,8 +580,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -646,8 +646,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -712,8 +712,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -778,8 +778,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -844,8 +844,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -910,8 +910,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -976,8 +976,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1037,11 +1037,11 @@
 ; RV64IA-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1091,11 +1091,11 @@
 ; RV64IA-LABEL: atomicrmw_and_i8_acquire:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1145,11 +1145,11 @@
 ; RV64IA-LABEL: atomicrmw_and_i8_release:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1199,11 +1199,11 @@
 ; RV64IA-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1253,11 +1253,11 @@
 ; RV64IA-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1313,8 +1313,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1381,8 +1381,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1449,8 +1449,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1517,8 +1517,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1585,8 +1585,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1643,9 +1643,9 @@
 ; RV64IA-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1689,9 +1689,9 @@
 ; RV64IA-LABEL: atomicrmw_or_i8_acquire:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1735,9 +1735,9 @@
 ; RV64IA-LABEL: atomicrmw_or_i8_release:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1781,9 +1781,9 @@
 ; RV64IA-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1827,9 +1827,9 @@
 ; RV64IA-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1873,9 +1873,9 @@
 ; RV64IA-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1919,9 +1919,9 @@
 ; RV64IA-LABEL: atomicrmw_xor_i8_acquire:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1965,9 +1965,9 @@
 ; RV64IA-LABEL: atomicrmw_xor_i8_release:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -2011,9 +2011,9 @@
 ; RV64IA-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -2057,9 +2057,9 @@
 ; RV64IA-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -3738,8 +3738,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -3880,8 +3880,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4022,8 +4022,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4170,8 +4170,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4312,8 +4312,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4448,8 +4448,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4590,8 +4590,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4732,8 +4732,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4880,8 +4880,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -5022,8 +5022,8 @@
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -5096,8 +5096,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5164,8 +5164,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5232,8 +5232,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5300,8 +5300,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5368,8 +5368,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5436,8 +5436,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5504,8 +5504,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5572,8 +5572,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5640,8 +5640,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5708,8 +5708,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5776,8 +5776,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5844,8 +5844,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5912,8 +5912,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5980,8 +5980,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6048,8 +6048,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6110,10 +6110,10 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6166,10 +6166,10 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6222,10 +6222,10 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6278,10 +6278,10 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6334,10 +6334,10 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6397,8 +6397,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6467,8 +6467,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6537,8 +6537,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6607,8 +6607,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6677,8 +6677,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6737,9 +6737,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6787,9 +6787,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6837,9 +6837,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6887,9 +6887,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6937,9 +6937,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6987,9 +6987,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7037,9 +7037,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7087,9 +7087,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7137,9 +7137,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7187,9 +7187,9 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -8900,8 +8900,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9052,8 +9052,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9204,8 +9204,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9362,8 +9362,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9514,8 +9514,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9660,8 +9660,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9812,8 +9812,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9964,8 +9964,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -10122,8 +10122,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -10274,8 +10274,8 @@
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
diff --git a/llvm/test/CodeGen/RISCV/pr40333.ll b/llvm/test/CodeGen/RISCV/pr40333.ll
index 3f7ae8d..79e24e3 100644
--- a/llvm/test/CodeGen/RISCV/pr40333.ll
+++ b/llvm/test/CodeGen/RISCV/pr40333.ll
@@ -7,17 +7,10 @@
 ; loop would be created in DAGCombine, converting ANY_EXTEND to SIGN_EXTEND
 ; and back again.
 
-; TODO: This test case is also an example of where it would be cheaper to
-; select SRLW, but the current lowering strategy fails to do so.
-
 define signext i8 @foo(i32 %a, i32 %b) nounwind {
 ; RV64I-LABEL: foo:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    srlw a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    ret