[llvm][mlir] Promote the experimental reduction intrinsics to be first class intrinsics.

This change renames the intrinsics to not have "experimental" in the name.

The autoupgrader will handle legacy intrinsics.

Relevant ML thread: http://lists.llvm.org/pipermail/llvm-dev/2020-April/140729.html

Differential Revision: https://reviews.llvm.org/D88787
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index f1e0f29..c037a35 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -1,16 +1,16 @@
 ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s
 
 ; Function Attrs: nounwind readnone
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
 
 define i8 @add_B(<16 x i8>* %arr)  {
 ; CHECK-LABEL: add_B
 ; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
   %bin.rdx = load <16 x i8>, <16 x i8>* %arr
-  %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %bin.rdx)
+  %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %bin.rdx)
   ret i8 %r
 }
 
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: add_H
 ; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
   %bin.rdx = load <8 x i16>, <8 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %bin.rdx)
+  %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %bin.rdx)
   ret i16 %r
 }
 
@@ -26,7 +26,7 @@
 ; CHECK-LABEL: add_S
 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
   %bin.rdx = load <4 x i32>, <4 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
+  %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
   ret i32 %r
 }
 
@@ -35,11 +35,11 @@
 ; CHECK-NOT: addv
 ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
   %bin.rdx = load <2 x i64>, <2 x i64>* %arr
-  %r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %bin.rdx)
+  %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %bin.rdx)
   ret i64 %r
 }
 
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
 
 define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
 ; CHECK-LABEL: oversized_ADDV_256
@@ -55,16 +55,16 @@
   %7 = icmp slt <8 x i32> %6, zeroinitializer
   %8 = sub nsw <8 x i32> zeroinitializer, %6
   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
-  %r = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %9)
+  %r = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %9)
   ret i32 %r
 }
 
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
 
 define i32 @oversized_ADDV_512(<16 x i32>* %arr)  {
 ; CHECK-LABEL: oversized_ADDV_512
 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
   %bin.rdx = load <16 x i32>, <16 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
+  %r = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index b6f95a7..4ff8029 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -2,28 +2,28 @@
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
-declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
 
-declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
 
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
 
 ; CHECK-LABEL: smax_B
 ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @smax_B(<16 x i8>* nocapture readonly %arr)  {
   %arr.load = load <16 x i8>, <16 x i8>* %arr
-  %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %arr.load)
+  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
@@ -31,7 +31,7 @@
 ; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
   %arr.load = load <8 x i16>, <8 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
@@ -39,7 +39,7 @@
 ; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @smax_S(<4 x i32> * nocapture readonly %arr)  {
   %arr.load = load <4 x i32>, <4 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
@@ -47,7 +47,7 @@
 ; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @umax_B(<16 x i8>* nocapture readonly %arr)  {
   %arr.load = load <16 x i8>, <16 x i8>* %arr
-  %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %arr.load)
+  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
@@ -55,7 +55,7 @@
 ; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @umax_H(<8 x i16>* nocapture readonly %arr)  {
   %arr.load = load <8 x i16>, <8 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
@@ -63,7 +63,7 @@
 ; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
   %arr.load = load <4 x i32>, <4 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
@@ -71,7 +71,7 @@
 ; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
   %arr.load = load <16 x i8>, <16 x i8>* %arr
-  %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %arr.load)
+  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
@@ -79,7 +79,7 @@
 ; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
   %arr.load = load <8 x i16>, <8 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
@@ -87,7 +87,7 @@
 ; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
   %arr.load = load <4 x i32>, <4 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
@@ -95,7 +95,7 @@
 ; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
 define i8 @umin_B(<16 x i8>* nocapture readonly %arr)  {
   %arr.load = load <16 x i8>, <16 x i8>* %arr
-  %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %arr.load)
+  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arr.load)
   ret i8 %r
 }
 
@@ -103,7 +103,7 @@
 ; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
 define i16 @umin_H(<8 x i16>* nocapture readonly %arr)  {
   %arr.load = load <8 x i16>, <8 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arr.load)
   ret i16 %r
 }
 
@@ -111,7 +111,7 @@
 ; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
 define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
   %arr.load = load <4 x i32>, <4 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arr.load)
   ret i32 %r
 }
 
@@ -119,7 +119,7 @@
 ; CHECK: fmaxnmv
 define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
   %arr.load  = load <4 x float>, <4 x float>* %arr
-  %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %arr.load)
+  %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arr.load)
   ret float %r
 }
 
@@ -127,22 +127,22 @@
 ; CHECK: fminnmv
 define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
   %arr.load  = load <4 x float>, <4 x float>* %arr
-  %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %arr.load)
+  %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arr.load)
   ret float %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
 
 define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umax_256
 ; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: umaxv {{h[0-9]+}}, [[V0]]
   %arr.load = load <16 x i16>, <16 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
-declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
 
 define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umax_512
@@ -151,22 +151,22 @@
 ; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
 
 define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umin_256
 ; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: uminv {{h[0-9]+}}, [[V0]]
   %arr.load = load <16 x i16>, <16 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
-declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
 
 define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_umin_512
@@ -175,22 +175,22 @@
 ; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
 
 define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smax_256
 ; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: smaxv {{h[0-9]+}}, [[V0]]
   %arr.load = load <16 x i16>, <16 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
-declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
 
 define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smax_512
@@ -199,22 +199,22 @@
 ; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
 
 define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smin_256
 ; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: sminv {{h[0-9]+}}, [[V0]]
   %arr.load = load <16 x i16>, <16 x i16>* %arr
-  %r = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %arr.load)
+  %r = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %arr.load)
   ret i16 %r
 }
 
-declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
 
 define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr)  {
 ; CHECK-LABEL: oversized_smin_512
@@ -223,6 +223,6 @@
 ; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
   %arr.load = load <16 x i32>, <16 x i32>* %arr
-  %r = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %arr.load)
+  %r = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %arr.load)
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 419cafc..23eddd4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -141,7 +141,7 @@
   ret <2 x i64> %tmp4
 }
 
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
 
 define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
 ; CHECK-LABEL: uabdl8h_rdx
@@ -155,11 +155,11 @@
   %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer
   %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff
   %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff
-  %reduced_v = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %absel)
+  %reduced_v = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %absel)
   ret i16 %reduced_v
 }
 
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
 
 define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {
 ; CHECK-LABEL: uabdl4s_rdx
@@ -173,11 +173,11 @@
   %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
   %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
   %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
-  %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %absel)
+  %reduced_v = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %absel)
   ret i32 %reduced_v
 }
 
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
 
 define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
 ; CHECK: uabdl2d_rdx
@@ -191,7 +191,7 @@
   %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
   %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
   %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
-  %reduced_v = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %absel)
+  %reduced_v = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %absel)
   ret i64 %reduced_v
 }
 
diff --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
index eef89ab..6743554 100644
--- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
@@ -205,7 +205,7 @@
   ret void
 }
 
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
 
 define i32 @test_udot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) {
 entry:
@@ -218,7 +218,7 @@
   %4 = load <8 x i8>, <8 x i8>* %3
   %5 = zext <8 x i8> %4 to <8 x i32>
   %6 = mul nuw nsw <8 x i32> %5, %2
-  %7 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %6)
+  %7 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %6)
   ret i32 %7
 }
 
@@ -233,11 +233,11 @@
   %4 = load <8 x i8>, <8 x i8>* %3
   %5 = sext <8 x i8> %4 to <8 x i32>
   %6 = mul nsw <8 x i32> %5, %2
-  %7 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %6)
+  %7 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %6)
   ret i32 %7
 }
 
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
 
 define i32 @test_udot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) {
 entry:
@@ -250,7 +250,7 @@
   %4 = load <16 x i8>, <16 x i8>* %3
   %5 = zext <16 x i8> %4 to <16 x i32>
   %6 = mul nuw nsw <16 x i32> %5, %2
-  %7 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %6)
+  %7 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6)
   %op.extra = add i32 %7, %sum
   ret i32 %op.extra
 }
@@ -265,7 +265,7 @@
   %0 = bitcast i8* %a1 to <16 x i8>*
   %1 = load <16 x i8>, <16 x i8>* %0
   %2 = zext <16 x i8> %1 to <16 x i32>
-  %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2)
+  %3 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %2)
   ret i32 %3
 }
 
@@ -280,7 +280,7 @@
   %4 = load <16 x i8>, <16 x i8>* %3
   %5 = sext <16 x i8> %4 to <16 x i32>
   %6 = mul nsw <16 x i32> %5, %2
-  %7 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %6)
+  %7 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %6)
   %op.extra = add nsw i32 %7, %sum
   ret i32 %op.extra
 }
@@ -295,6 +295,6 @@
   %0 = bitcast i8* %a1 to <16 x i8>*
   %1 = load <16 x i8>, <16 x i8>* %0
   %2 = sext <16 x i8> %1 to <16 x i32>
-  %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2)
+  %3 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %2)
   ret i32 %3
 }
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
index 68501a7..10aca25 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: fmaxv_v4f16:
 ; CHECK: fmaxnmv h0, v0.4h
 ; CHECK-NEXT: ret
-  %res = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a)
+  %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
   ret half %res
 }
 
@@ -38,7 +38,7 @@
 ; CHECK-LABEL: fmaxv_v8f16:
 ; CHECK: fmaxnmv h0, v0.8h
 ; CHECK-NEXT: ret
-  %res = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %a)
+  %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
   ret half %res
 }
 
@@ -49,7 +49,7 @@
 ; VBITS_GE_256-NEXT: fmaxnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x half>, <16 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %op)
+  %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
   ret half %res
 }
 
@@ -60,7 +60,7 @@
 ; VBITS_GE_512-NEXT: fmaxnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_512-NEXT: ret
   %op = load <32 x half>, <32 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmax.v32f16(<32 x half> %op)
+  %res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op)
   ret half %res
 }
 
@@ -71,7 +71,7 @@
 ; VBITS_GE_1024-NEXT: fmaxnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x half>, <64 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmax.v64f16(<64 x half> %op)
+  %res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op)
   ret half %res
 }
 
@@ -82,7 +82,7 @@
 ; VBITS_GE_2048-NEXT: fmaxnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x half>, <128 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmax.v128f16(<128 x half> %op)
+  %res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op)
   ret half %res
 }
 
@@ -91,7 +91,7 @@
 ; CHECK-LABEL: fmaxv_v2f32:
 ; CHECK: fmaxnmp s0, v0.2s
 ; CHECK: ret
-  %res = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a)
+  %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
   ret float %res
 }
 
@@ -100,7 +100,7 @@
 ; CHECK-LABEL: fmaxv_v4f32:
 ; CHECK: fmaxnmv s0, v0.4s
 ; CHECK: ret
-  %res = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a)
+  %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
   ret float %res
 }
 
@@ -111,7 +111,7 @@
 ; VBITS_GE_256-NEXT: fmaxnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x float>, <8 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %op)
+  %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
   ret float %res
 }
 
@@ -122,7 +122,7 @@
 ; VBITS_GE_512-NEXT: fmaxnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_512-NEXT: ret
   %op = load <16 x float>, <16 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %op)
+  %res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op)
   ret float %res
 }
 
@@ -133,7 +133,7 @@
 ; VBITS_GE_1024-NEXT: fmaxnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x float>, <32 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> %op)
+  %res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op)
   ret float %res
 }
 
@@ -144,7 +144,7 @@
 ; VBITS_GE_2048-NEXT: fmaxnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x float>, <64 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmax.v64f32(<64 x float> %op)
+  %res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op)
   ret float %res
 }
 
@@ -153,7 +153,7 @@
 ; CHECK-LABEL: fmaxv_v1f64:
 ; CHECK-NOT: fmax
 ; CHECK: ret
-  %res = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
+  %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
   ret double %res
 }
 
@@ -162,7 +162,7 @@
 ; CHECK-LABEL: fmaxv_v2f64:
 ; CHECK: fmaxnmp d0, v0.2d
 ; CHECK-NEXT: ret
-  %res = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a)
+  %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
   ret double %res
 }
 
@@ -173,7 +173,7 @@
 ; VBITS_GE_256-NEXT: fmaxnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x double>, <4 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %op)
+  %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
   ret double %res
 }
 
@@ -184,7 +184,7 @@
 ; VBITS_GE_512-NEXT: fmaxnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_512-NEXT: ret
   %op = load <8 x double>, <8 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %op)
+  %res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op)
   ret double %res
 }
 
@@ -195,7 +195,7 @@
 ; VBITS_GE_1024-NEXT: fmaxnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x double>, <16 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %op)
+  %res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op)
   ret double %res
 }
 
@@ -206,7 +206,7 @@
 ; VBITS_GE_2048-NEXT: fmaxnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x double>, <32 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmax.v32f64(<32 x double> %op)
+  %res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op)
   ret double %res
 }
 
@@ -219,7 +219,7 @@
 ; CHECK-LABEL: fminv_v4f16:
 ; CHECK: fminnmv h0, v0.4h
 ; CHECK-NEXT: ret
-  %res = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a)
+  %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
   ret half %res
 }
 
@@ -228,7 +228,7 @@
 ; CHECK-LABEL: fminv_v8f16:
 ; CHECK: fminnmv h0, v0.8h
 ; CHECK-NEXT: ret
-  %res = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %a)
+  %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
   ret half %res
 }
 
@@ -239,7 +239,7 @@
 ; VBITS_GE_256-NEXT: fminnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x half>, <16 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %op)
+  %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
   ret half %res
 }
 
@@ -250,7 +250,7 @@
 ; VBITS_GE_512-NEXT: fminnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_512-NEXT: ret
   %op = load <32 x half>, <32 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmin.v32f16(<32 x half> %op)
+  %res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op)
   ret half %res
 }
 
@@ -261,7 +261,7 @@
 ; VBITS_GE_1024-NEXT: fminnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x half>, <64 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmin.v64f16(<64 x half> %op)
+  %res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op)
   ret half %res
 }
 
@@ -272,7 +272,7 @@
 ; VBITS_GE_2048-NEXT: fminnmv h0, [[PG]], [[OP]].h
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x half>, <128 x half>* %a
-  %res = call half @llvm.experimental.vector.reduce.fmin.v128f16(<128 x half> %op)
+  %res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op)
   ret half %res
 }
 
@@ -281,7 +281,7 @@
 ; CHECK-LABEL: fminv_v2f32:
 ; CHECK: fminnmp s0, v0.2s
 ; CHECK: ret
-  %res = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a)
+  %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
   ret float %res
 }
 
@@ -290,7 +290,7 @@
 ; CHECK-LABEL: fminv_v4f32:
 ; CHECK: fminnmv s0, v0.4s
 ; CHECK: ret
-  %res = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a)
+  %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
   ret float %res
 }
 
@@ -301,7 +301,7 @@
 ; VBITS_GE_256-NEXT: fminnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x float>, <8 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %op)
+  %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
   ret float %res
 }
 
@@ -312,7 +312,7 @@
 ; VBITS_GE_512-NEXT: fminnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_512-NEXT: ret
   %op = load <16 x float>, <16 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %op)
+  %res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op)
   ret float %res
 }
 
@@ -323,7 +323,7 @@
 ; VBITS_GE_1024-NEXT: fminnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x float>, <32 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> %op)
+  %res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op)
   ret float %res
 }
 
@@ -334,7 +334,7 @@
 ; VBITS_GE_2048-NEXT: fminnmv s0, [[PG]], [[OP]].s
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x float>, <64 x float>* %a
-  %res = call float @llvm.experimental.vector.reduce.fmin.v64f32(<64 x float> %op)
+  %res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op)
   ret float %res
 }
 
@@ -343,7 +343,7 @@
 ; CHECK-LABEL: fminv_v1f64:
 ; CHECK-NOT: fmin
 ; CHECK: ret
-  %res = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a)
+  %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
   ret double %res
 }
 
@@ -352,7 +352,7 @@
 ; CHECK-LABEL: fminv_v2f64:
 ; CHECK: fminnmp d0, v0.2d
 ; CHECK-NEXT: ret
-  %res = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a)
+  %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
   ret double %res
 }
 
@@ -363,7 +363,7 @@
 ; VBITS_GE_256-NEXT: fminnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x double>, <4 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %op)
+  %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
   ret double %res
 }
 
@@ -374,7 +374,7 @@
 ; VBITS_GE_512-NEXT: fminnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_512-NEXT: ret
   %op = load <8 x double>, <8 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %op)
+  %res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op)
   ret double %res
 }
 
@@ -385,7 +385,7 @@
 ; VBITS_GE_1024-NEXT: fminnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x double>, <16 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %op)
+  %res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op)
   ret double %res
 }
 
@@ -396,50 +396,50 @@
 ; VBITS_GE_2048-NEXT: fminnmv d0, [[PG]], [[OP]].d
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x double>, <32 x double>* %a
-  %res = call double @llvm.experimental.vector.reduce.fmin.v32f64(<32 x double> %op)
+  %res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op)
   ret double %res
 }
 
 attributes #0 = { "target-features"="+sve" }
 
-declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v32f16(<32 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v64f16(<64 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v128f16(<128 x half>)
+declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
+declare half @llvm.vector.reduce.fmax.v32f16(<32 x half>)
+declare half @llvm.vector.reduce.fmax.v64f16(<64 x half>)
+declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>)
 
-declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v64f32(<64 x float>)
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>)
+declare float @llvm.vector.reduce.fmax.v64f32(<64 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v32f64(<32 x double>)
+declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>)
 
-declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v32f16(<32 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v64f16(<64 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v128f16(<128 x half>)
+declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
+declare half @llvm.vector.reduce.fmin.v32f16(<32 x half>)
+declare half @llvm.vector.reduce.fmin.v64f16(<64 x half>)
+declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>)
 
-declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v64f32(<64 x float>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>)
+declare float @llvm.vector.reduce.fmin.v64f32(<64 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v32f64(<32 x double>)
+declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
index 633fe19..4967f53 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: uaddv_v8i8:
 ; CHECK: addv b0, v0.8b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
   ret i8 %res
 }
 
@@ -38,7 +38,7 @@
 ; CHECK-LABEL: uaddv_v16i8:
 ; CHECK: addv b0, v0.16b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
   ret i8 %res
 }
 
@@ -50,7 +50,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
   ret i8 %res
 }
 
@@ -72,7 +72,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <64 x i8>, <64 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %op)
   ret i8 %res
 }
 
@@ -84,7 +84,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <128 x i8>, <128 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %op)
   ret i8 %res
 }
 
@@ -96,7 +96,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <256 x i8>, <256 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.add.v256i8(<256 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %op)
   ret i8 %res
 }
 
@@ -105,7 +105,7 @@
 ; CHECK-LABEL: uaddv_v4i16:
 ; CHECK: addv h0, v0.4h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
   ret i16 %res
 }
 
@@ -114,7 +114,7 @@
 ; CHECK-LABEL: uaddv_v8i16:
 ; CHECK: addv h0, v0.8h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
   ret i16 %res
 }
 
@@ -126,7 +126,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
   ret i16 %res
 }
 
@@ -148,7 +148,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <32 x i16>, <32 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %op)
   ret i16 %res
 }
 
@@ -160,7 +160,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x i16>, <64 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %op)
   ret i16 %res
 }
 
@@ -172,7 +172,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x i16>, <128 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.add.v128i16(<128 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %op)
   ret i16 %res
 }
 
@@ -181,7 +181,7 @@
 ; CHECK-LABEL: uaddv_v2i32:
 ; CHECK: addp v0.2s, v0.2s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
   ret i32 %res
 }
 
@@ -190,7 +190,7 @@
 ; CHECK-LABEL: uaddv_v4i32:
 ; CHECK: addv s0, v0.4s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
   ret i32 %res
 }
 
@@ -202,7 +202,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
   ret i32 %res
 }
 
@@ -224,7 +224,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <16 x i32>, <16 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %op)
   ret i32 %res
 }
 
@@ -236,7 +236,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x i32>, <32 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %op)
   ret i32 %res
 }
 
@@ -248,7 +248,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x i32>, <64 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.add.v64i32(<64 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %op)
   ret i32 %res
 }
 
@@ -257,7 +257,7 @@
 ; CHECK-LABEL: uaddv_v1i64:
 ; CHECK: fmov x0, d0
 ; CHECK: ret
-  %res = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
   ret i64 %res
 }
 
@@ -266,7 +266,7 @@
 ; CHECK-LABEL: uaddv_v2i64:
 ; CHECK: addp d0, v0.2d
 ; CHECK: ret
-  %res = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
   ret i64 %res
 }
 
@@ -278,7 +278,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op)
   ret i64 %res
 }
 
@@ -300,7 +300,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <8 x i64>, <8 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %op)
   ret i64 %res
 }
 
@@ -312,7 +312,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x i64>, <16 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %op)
   ret i64 %res
 }
 
@@ -324,7 +324,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x i64>, <32 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.add.v32i64(<32 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> %op)
   ret i64 %res
 }
 
@@ -337,7 +337,7 @@
 ; CHECK-LABEL: smaxv_v8i8:
 ; CHECK: smaxv b0, v0.8b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a)
   ret i8 %res
 }
 
@@ -346,7 +346,7 @@
 ; CHECK-LABEL: smaxv_v16i8:
 ; CHECK: smaxv b0, v0.16b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a)
   ret i8 %res
 }
 
@@ -358,7 +358,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op)
   ret i8 %res
 }
 
@@ -380,7 +380,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <64 x i8>, <64 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %op)
   ret i8 %res
 }
 
@@ -392,7 +392,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <128 x i8>, <128 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %op)
   ret i8 %res
 }
 
@@ -404,7 +404,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <256 x i8>, <256 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smax.v256i8(<256 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smax.v256i8(<256 x i8> %op)
   ret i8 %res
 }
 
@@ -413,7 +413,7 @@
 ; CHECK-LABEL: smaxv_v4i16:
 ; CHECK: smaxv h0, v0.4h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a)
   ret i16 %res
 }
 
@@ -422,7 +422,7 @@
 ; CHECK-LABEL: smaxv_v8i16:
 ; CHECK: smaxv h0, v0.8h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a)
   ret i16 %res
 }
 
@@ -434,7 +434,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op)
   ret i16 %res
 }
 
@@ -456,7 +456,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <32 x i16>, <32 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %op)
   ret i16 %res
 }
 
@@ -468,7 +468,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x i16>, <64 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %op)
   ret i16 %res
 }
 
@@ -480,7 +480,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x i16>, <128 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smax.v128i16(<128 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> %op)
   ret i16 %res
 }
 
@@ -489,7 +489,7 @@
 ; CHECK-LABEL: smaxv_v2i32:
 ; CHECK: smaxp v0.2s, v0.2s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a)
   ret i32 %res
 }
 
@@ -498,7 +498,7 @@
 ; CHECK-LABEL: smaxv_v4i32:
 ; CHECK: smaxv s0, v0.4s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
   ret i32 %res
 }
 
@@ -510,7 +510,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op)
   ret i32 %res
 }
 
@@ -532,7 +532,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <16 x i32>, <16 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %op)
   ret i32 %res
 }
 
@@ -544,7 +544,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x i32>, <32 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> %op)
   ret i32 %res
 }
 
@@ -556,7 +556,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x i32>, <64 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smax.v64i32(<64 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> %op)
   ret i32 %res
 }
 
@@ -565,7 +565,7 @@
 ; CHECK-LABEL: smaxv_v1i64:
 ; CHECK: fmov x0, d0
 ; CHECK: ret
-  %res = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> %a)
   ret i64 %res
 }
 
@@ -576,7 +576,7 @@
 ; CHECK-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 ; CHECK-NEXT: ret
-  %res = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a)
   ret i64 %res
 }
 
@@ -588,7 +588,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op)
   ret i64 %res
 }
 
@@ -610,7 +610,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <8 x i64>, <8 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %op)
   ret i64 %res
 }
 
@@ -622,7 +622,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x i64>, <16 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> %op)
   ret i64 %res
 }
 
@@ -634,7 +634,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x i64>, <32 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smax.v32i64(<32 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> %op)
   ret i64 %res
 }
 
@@ -647,7 +647,7 @@
 ; CHECK-LABEL: sminv_v8i8:
 ; CHECK: sminv b0, v0.8b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a)
   ret i8 %res
 }
 
@@ -656,7 +656,7 @@
 ; CHECK-LABEL: sminv_v16i8:
 ; CHECK: sminv b0, v0.16b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a)
   ret i8 %res
 }
 
@@ -668,7 +668,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op)
   ret i8 %res
 }
 
@@ -690,7 +690,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <64 x i8>, <64 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %op)
   ret i8 %res
 }
 
@@ -702,7 +702,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <128 x i8>, <128 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %op)
   ret i8 %res
 }
 
@@ -714,7 +714,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <256 x i8>, <256 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.smin.v256i8(<256 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.smin.v256i8(<256 x i8> %op)
   ret i8 %res
 }
 
@@ -723,7 +723,7 @@
 ; CHECK-LABEL: sminv_v4i16:
 ; CHECK: sminv h0, v0.4h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a)
   ret i16 %res
 }
 
@@ -732,7 +732,7 @@
 ; CHECK-LABEL: sminv_v8i16:
 ; CHECK: sminv h0, v0.8h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a)
   ret i16 %res
 }
 
@@ -744,7 +744,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op)
   ret i16 %res
 }
 
@@ -766,7 +766,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <32 x i16>, <32 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %op)
   ret i16 %res
 }
 
@@ -778,7 +778,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x i16>, <64 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %op)
   ret i16 %res
 }
 
@@ -790,7 +790,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x i16>, <128 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.smin.v128i16(<128 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> %op)
   ret i16 %res
 }
 
@@ -799,7 +799,7 @@
 ; CHECK-LABEL: sminv_v2i32:
 ; CHECK: minp v0.2s, v0.2s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a)
   ret i32 %res
 }
 
@@ -808,7 +808,7 @@
 ; CHECK-LABEL: sminv_v4i32:
 ; CHECK: sminv s0, v0.4s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
   ret i32 %res
 }
 
@@ -820,7 +820,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op)
   ret i32 %res
 }
 
@@ -842,7 +842,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <16 x i32>, <16 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %op)
   ret i32 %res
 }
 
@@ -854,7 +854,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x i32>, <32 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %op)
   ret i32 %res
 }
 
@@ -866,7 +866,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x i32>, <64 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.smin.v64i32(<64 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> %op)
   ret i32 %res
 }
 
@@ -875,7 +875,7 @@
 ; CHECK-LABEL: sminv_v1i64:
 ; CHECK: fmov x0, d0
 ; CHECK: ret
-  %res = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> %a)
   ret i64 %res
 }
 
@@ -886,7 +886,7 @@
 ; CHECK-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 ; CHECK-NEXT: ret
-  %res = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a)
   ret i64 %res
 }
 
@@ -898,7 +898,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op)
   ret i64 %res
 }
 
@@ -920,7 +920,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <8 x i64>, <8 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %op)
   ret i64 %res
 }
 
@@ -932,7 +932,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x i64>, <16 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> %op)
   ret i64 %res
 }
 
@@ -944,7 +944,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x i64>, <32 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.smin.v32i64(<32 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> %op)
   ret i64 %res
 }
 
@@ -957,7 +957,7 @@
 ; CHECK-LABEL: umaxv_v8i8:
 ; CHECK: umaxv b0, v0.8b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a)
   ret i8 %res
 }
 
@@ -966,7 +966,7 @@
 ; CHECK-LABEL: umaxv_v16i8:
 ; CHECK: umaxv b0, v0.16b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a)
   ret i8 %res
 }
 
@@ -978,7 +978,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op)
   ret i8 %res
 }
 
@@ -1000,7 +1000,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <64 x i8>, <64 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> %op)
   ret i8 %res
 }
 
@@ -1012,7 +1012,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <128 x i8>, <128 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> %op)
   ret i8 %res
 }
 
@@ -1024,7 +1024,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <256 x i8>, <256 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umax.v256i8(<256 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umax.v256i8(<256 x i8> %op)
   ret i8 %res
 }
 
@@ -1033,7 +1033,7 @@
 ; CHECK-LABEL: umaxv_v4i16:
 ; CHECK: umaxv h0, v0.4h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a)
   ret i16 %res
 }
 
@@ -1042,7 +1042,7 @@
 ; CHECK-LABEL: umaxv_v8i16:
 ; CHECK: umaxv h0, v0.8h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a)
   ret i16 %res
 }
 
@@ -1054,7 +1054,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op)
   ret i16 %res
 }
 
@@ -1076,7 +1076,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <32 x i16>, <32 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> %op)
   ret i16 %res
 }
 
@@ -1088,7 +1088,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x i16>, <64 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> %op)
   ret i16 %res
 }
 
@@ -1100,7 +1100,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x i16>, <128 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umax.v128i16(<128 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> %op)
   ret i16 %res
 }
 
@@ -1109,7 +1109,7 @@
 ; CHECK-LABEL: umaxv_v2i32:
 ; CHECK: umaxp v0.2s, v0.2s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a)
   ret i32 %res
 }
 
@@ -1118,7 +1118,7 @@
 ; CHECK-LABEL: umaxv_v4i32:
 ; CHECK: umaxv s0, v0.4s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
   ret i32 %res
 }
 
@@ -1130,7 +1130,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op)
   ret i32 %res
 }
 
@@ -1152,7 +1152,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <16 x i32>, <16 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %op)
   ret i32 %res
 }
 
@@ -1164,7 +1164,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x i32>, <32 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> %op)
   ret i32 %res
 }
 
@@ -1176,7 +1176,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x i32>, <64 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umax.v64i32(<64 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> %op)
   ret i32 %res
 }
 
@@ -1185,7 +1185,7 @@
 ; CHECK-LABEL: umaxv_v1i64:
 ; CHECK: fmov x0, d0
 ; CHECK: ret
-  %res = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
   ret i64 %res
 }
 
@@ -1196,7 +1196,7 @@
 ; CHECK-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 ; CHECK-NEXT: ret
-  %res = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
   ret i64 %res
 }
 
@@ -1208,7 +1208,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op)
   ret i64 %res
 }
 
@@ -1230,7 +1230,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <8 x i64>, <8 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %op)
   ret i64 %res
 }
 
@@ -1242,7 +1242,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x i64>, <16 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> %op)
   ret i64 %res
 }
 
@@ -1254,7 +1254,7 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x i64>, <32 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umax.v32i64(<32 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> %op)
   ret i64 %res
 }
 
@@ -1267,7 +1267,7 @@
 ; CHECK-LABEL: uminv_v8i8:
 ; CHECK: uminv b0, v0.8b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
   ret i8 %res
 }
 
@@ -1276,7 +1276,7 @@
 ; CHECK-LABEL: uminv_v16i8:
 ; CHECK: uminv b0, v0.16b
 ; CHECK: ret
-  %res = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a)
+  %res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
   ret i8 %res
 }
 
@@ -1288,7 +1288,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op)
   ret i8 %res
 }
 
@@ -1310,7 +1310,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <64 x i8>, <64 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> %op)
   ret i8 %res
 }
 
@@ -1322,7 +1322,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <128 x i8>, <128 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> %op)
   ret i8 %res
 }
 
@@ -1334,7 +1334,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <256 x i8>, <256 x i8>* %a
-  %res = call i8 @llvm.experimental.vector.reduce.umin.v256i8(<256 x i8> %op)
+  %res = call i8 @llvm.vector.reduce.umin.v256i8(<256 x i8> %op)
   ret i8 %res
 }
 
@@ -1343,7 +1343,7 @@
 ; CHECK-LABEL: uminv_v4i16:
 ; CHECK: uminv h0, v0.4h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
   ret i16 %res
 }
 
@@ -1352,7 +1352,7 @@
 ; CHECK-LABEL: uminv_v8i16:
 ; CHECK: uminv h0, v0.8h
 ; CHECK: ret
-  %res = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %a)
+  %res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
   ret i16 %res
 }
 
@@ -1364,7 +1364,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op)
   ret i16 %res
 }
 
@@ -1386,7 +1386,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <32 x i16>, <32 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> %op)
   ret i16 %res
 }
 
@@ -1398,7 +1398,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <64 x i16>, <64 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> %op)
   ret i16 %res
 }
 
@@ -1410,7 +1410,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <128 x i16>, <128 x i16>* %a
-  %res = call i16 @llvm.experimental.vector.reduce.umin.v128i16(<128 x i16> %op)
+  %res = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> %op)
   ret i16 %res
 }
 
@@ -1419,7 +1419,7 @@
 ; CHECK-LABEL: uminv_v2i32:
 ; CHECK: minp v0.2s, v0.2s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
   ret i32 %res
 }
 
@@ -1428,7 +1428,7 @@
 ; CHECK-LABEL: uminv_v4i32:
 ; CHECK: uminv s0, v0.4s
 ; CHECK: ret
-  %res = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a)
+  %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
   ret i32 %res
 }
 
@@ -1440,7 +1440,7 @@
 ; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op)
   ret i32 %res
 }
 
@@ -1462,7 +1462,7 @@
 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <16 x i32>, <16 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %op)
   ret i32 %res
 }
 
@@ -1474,7 +1474,7 @@
 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <32 x i32>, <32 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> %op)
   ret i32 %res
 }
 
@@ -1486,7 +1486,7 @@
 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <64 x i32>, <64 x i32>* %a
-  %res = call i32 @llvm.experimental.vector.reduce.umin.v64i32(<64 x i32> %op)
+  %res = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> %op)
   ret i32 %res
 }
 
@@ -1495,7 +1495,7 @@
 ; CHECK-LABEL: uminv_v1i64:
 ; CHECK: fmov x0, d0
 ; CHECK: ret
-  %res = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> %a)
   ret i64 %res
 }
 
@@ -1506,7 +1506,7 @@
 ; CHECK-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 ; CHECK-NEXT: ret
-  %res = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %a)
+  %res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
   ret i64 %res
 }
 
@@ -1518,7 +1518,7 @@
 ; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_256-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op)
   ret i64 %res
 }
 
@@ -1540,7 +1540,7 @@
 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_EQ_256-NEXT: ret
   %op = load <8 x i64>, <8 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %op)
   ret i64 %res
 }
 
@@ -1552,7 +1552,7 @@
 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_1024-NEXT: ret
   %op = load <16 x i64>, <16 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %op)
   ret i64 %res
 }
 
@@ -1564,148 +1564,148 @@
 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 ; VBITS_GE_2048-NEXT: ret
   %op = load <32 x i64>, <32 x i64>* %a
-  %res = call i64 @llvm.experimental.vector.reduce.umin.v32i64(<32 x i64> %op)
+  %res = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> %op)
   ret i64 %res
 }
 
 attributes #0 = { "target-features"="+sve" }
 
-declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.add.v256i8(<256 x i8>)
 
-declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>)
 
-declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>)
 
-declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>)
 
-declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.smax.v256i8(<256 x i8>)
 
-declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smax.v128i16(<128 x i16>)
 
-declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smax.v64i32(<64 x i32>)
 
-declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smax.v32i64(<32 x i64>)
 
-declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.smin.v256i8(<256 x i8>)
 
-declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smin.v128i16(<128 x i16>)
 
-declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smin.v64i32(<64 x i32>)
 
-declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smin.v32i64(<32 x i64>)
 
-declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.umax.v256i8(<256 x i8>)
 
-declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.umax.v128i16(<128 x i16>)
 
-declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.umax.v64i32(<64 x i32>)
 
-declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.umax.v32i64(<32 x i64>)
 
-declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v256i8(<256 x i8>)
+declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.umin.v256i8(<256 x i8>)
 
-declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v128i16(<128 x i16>)
+declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.umin.v128i16(<128 x i16>)
 
-declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v64i32(<64 x i32>)
+declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.umin.v64i32(<64 x i32>)
 
-declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v32i64(<32 x i64>)
+declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.umin.v32i64(<32 x i64>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
index 2f89987..36ec218 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
@@ -1,28 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a)
-declare i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a)
-declare i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a)
-declare i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a)
-declare i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a)
-declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
-declare i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a)
+declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a)
+declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a)
+declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a)
+declare i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a)
+declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
+declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
+declare i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a)
 
-declare i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a)
-declare i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a)
-declare i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a)
-declare i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a)
-declare i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a)
-declare i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a)
+declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a)
+declare i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a)
+declare i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a)
+declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a)
+declare i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a)
+declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
 
 define i1 @test_v1i1(<1 x i1> %a) nounwind {
 ; CHECK-LABEL: test_v1i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w0, w0, #0x1
 ; CHECK-NEXT:    ret
-  %b = call i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a)
   ret i1 %b
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a)
   ret i8 %b
 }
 
@@ -42,7 +42,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
-  %b = call i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a)
+  %b = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a)
   ret i16 %b
 }
 
@@ -50,7 +50,7 @@
 ; CHECK-LABEL: test_v1i24:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a)
+  %b = call i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a)
   ret i24 %b
 }
 
@@ -60,7 +60,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
   ret i32 %b
 }
 
@@ -70,7 +70,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
-  %b = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
+  %b = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
   ret i64 %b
 }
 
@@ -78,7 +78,7 @@
 ; CHECK-LABEL: test_v1i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a)
+  %b = call i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a)
   ret i128 %b
 }
 
@@ -92,7 +92,7 @@
 ; CHECK-NEXT:    addv h0, v0.4h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a)
   ret i8 %b
 }
 
@@ -109,7 +109,7 @@
 ; CHECK-NEXT:    addv b0, v0.16b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a)
   ret i8 %b
 }
 
@@ -120,7 +120,7 @@
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a)
   ret i32 %b
 }
 
@@ -131,7 +131,7 @@
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
-  %b = call i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -141,7 +141,7 @@
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a)
+  %b = call i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a)
   ret i24 %b
 }
 
@@ -151,7 +151,7 @@
 ; CHECK-NEXT:    adds x0, x0, x2
 ; CHECK-NEXT:    adcs x1, x1, x3
 ; CHECK-NEXT:    ret
-  %b = call i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a)
+  %b = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
   ret i128 %b
 }
 
@@ -164,6 +164,6 @@
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
   ret i32 %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index 43b6f52..99d55b3 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -1,28 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
-declare i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a)
-declare i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a)
-declare i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a)
-declare i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a)
-declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a)
-declare i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a)
+declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
+declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
+declare i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a)
+declare i24 @llvm.vector.reduce.and.v1i24(<1 x i24> %a)
+declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
+declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
+declare i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)
 
-declare i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a)
-declare i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a)
-declare i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)
-declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
-declare i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a)
-declare i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a)
-declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a)
+declare i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
+declare i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
+declare i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
+declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+declare i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)
+declare i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)
+declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)
 
 define i1 @test_v1i1(<1 x i1> %a) nounwind {
 ; CHECK-LABEL: test_v1i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w0, w0, #0x1
 ; CHECK-NEXT:    ret
-  %b = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
   ret i1 %b
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
   ret i8 %b
 }
 
@@ -42,7 +42,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
-  %b = call i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a)
+  %b = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a)
   ret i16 %b
 }
 
@@ -50,7 +50,7 @@
 ; CHECK-LABEL: test_v1i24:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a)
+  %b = call i24 @llvm.vector.reduce.and.v1i24(<1 x i24> %a)
   ret i24 %b
 }
 
@@ -60,7 +60,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
   ret i32 %b
 }
 
@@ -70,7 +70,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
-  %b = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a)
+  %b = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
   ret i64 %b
 }
 
@@ -78,7 +78,7 @@
 ; CHECK-LABEL: test_v1i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a)
+  %b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)
   ret i128 %b
 }
 
@@ -89,7 +89,7 @@
 ; CHECK-NEXT:    and w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, #0xff
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
   ret i8 %b
 }
 
@@ -120,7 +120,7 @@
 ; CHECK-NEXT:    umov w9, v0.b[7]
 ; CHECK-NEXT:    and w0, w8, w9
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
   ret i8 %b
 }
 
@@ -133,7 +133,7 @@
 ; CHECK-NEXT:    fmov w9, s1
 ; CHECK-NEXT:    and w0, w9, w8
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
   ret i32 %b
 }
 
@@ -150,7 +150,7 @@
 ; CHECK-NEXT:    and w8, w9, w8
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
-  %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -163,7 +163,7 @@
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    and w0, w9, w8
 ; CHECK-NEXT:    ret
-  %b = call i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a)
+  %b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)
   ret i24 %b
 }
 
@@ -173,7 +173,7 @@
 ; CHECK-NEXT:    and x0, x0, x2
 ; CHECK-NEXT:    and x1, x1, x3
 ; CHECK-NEXT:    ret
-  %b = call i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a)
+  %b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)
   ret i128 %b
 }
 
@@ -189,6 +189,6 @@
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    and w0, w9, w8
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)
   ret i32 %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index e404159..1b5692c 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -1,19 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
+declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
+declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
+declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
 
-declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
+declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a)
+declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
+declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
+declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
 
 define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_and_v1:
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, lt
 ; CHECK-NEXT:    ret
   %x = icmp slt <1 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <2 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -58,7 +58,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <4 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -73,7 +73,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <8 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -88,7 +88,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <16 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -105,7 +105,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <32 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -119,7 +119,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, lt
 ; CHECK-NEXT:    ret
   %x = icmp slt <1 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -136,7 +136,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <2 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -153,7 +153,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <4 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -168,7 +168,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <8 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -183,7 +183,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <16 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
@@ -200,7 +200,7 @@
 ; CHECK-NEXT:    csel w0, w0, w1, ne
 ; CHECK-NEXT:    ret
   %x = icmp slt <32 x i8> %a0, zeroinitializer
-  %y = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %x)
+  %y = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %x)
   %z = select i1 %y, i32 %a1, i32 %a2
   ret i32 %z
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
index 5d6f2e4..12c1664 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
@@ -3,14 +3,14 @@
 
 ; Same as vecreduce-fadd-legalization.ll, but without fmf.
 
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>)
+declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
+declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
+declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
+declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
@@ -20,7 +20,7 @@
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
-  %b = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a)
+  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
   ret half %b
 }
 
@@ -31,7 +31,7 @@
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a)
+  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
   ret float %b
 }
 
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    fmov d1, xzr
 ; CHECK-NEXT:    fadd d0, d0, d1
 ; CHECK-NEXT:    ret
-  %b = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a)
+  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
   ret double %b
 }
 
@@ -54,7 +54,7 @@
 ; CHECK-NEXT:    bl __addtf3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -68,7 +68,7 @@
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    fadd s0, s1, s0
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a)
+  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
   ret float %b
 }
 
@@ -86,7 +86,7 @@
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -123,6 +123,6 @@
 ; CHECK-NEXT:    mov s1, v3.s[3]
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a)
+  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
index 1168614..e098c08 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
@@ -1,20 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>)
+declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
+declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
+declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
+declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call fast nnan half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a)
+  %b = call fast nnan half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
   ret half %b
 }
 
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
-  %b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a)
+  %b = call fast nnan float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
   ret float %b
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-LABEL: test_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call fast nnan double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a)
+  %b = call fast nnan double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
   ret double %b
 }
 
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: test_v1f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call fast nnan fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
+  %b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -53,7 +53,7 @@
 ; CHECK-NEXT:    fadd v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
-  %b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a)
+  %b = call fast nnan float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
   ret float %b
 }
 
@@ -64,7 +64,7 @@
 ; CHECK-NEXT:    bl __addtf3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %b = call fast nnan fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+  %b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -78,6 +78,6 @@
 ; CHECK-NEXT:    fadd v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
-  %b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a)
+  %b = call fast nnan float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 9036737..7c5e50a 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -14,7 +14,7 @@
 ; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    faddp s0, v0.2s
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx)
+  %r = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx)
   ret float %r
 }
 
@@ -48,7 +48,7 @@
 ; CHECKNOFP16-NEXT:    fadd s0, s0, s1
 ; CHECKNOFP16-NEXT:    fcvt h0, s0
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx)
+  %r = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx)
   ret half %r
 }
 
@@ -103,7 +103,7 @@
 ; CHECKNOFP16-NEXT:    fadd s0, s0, s1
 ; CHECKNOFP16-NEXT:    fcvt h0, s0
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx)
+  %r = call fast half @llvm.vector.reduce.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx)
   ret half %r
 }
 
@@ -121,7 +121,7 @@
 ; CHECKNOFP16-NEXT:    fadd v0.2s, v0.2s, v1.2s
 ; CHECKNOFP16-NEXT:    faddp s0, v0.2s
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx)
+  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx)
   ret float %r
 }
 
@@ -135,7 +135,7 @@
 ; CHECKNOFP16:       // %bb.0:
 ; CHECKNOFP16-NEXT:    faddp d0, v0.2d
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx)
+  %r = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx)
   ret double %r
 }
 
@@ -229,7 +229,7 @@
 ; CHECKNOFP16-NEXT:    fadd s0, s1, s0
 ; CHECKNOFP16-NEXT:    fcvt h0, s0
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx)
+  %r = call fast half @llvm.vector.reduce.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx)
   ret half %r
 }
 
@@ -249,7 +249,7 @@
 ; CHECKNOFP16-NEXT:    fadd v0.2s, v0.2s, v1.2s
 ; CHECKNOFP16-NEXT:    faddp s0, v0.2s
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx)
+  %r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx)
   ret float %r
 }
 
@@ -265,16 +265,16 @@
 ; CHECKNOFP16-NEXT:    fadd v0.2d, v0.2d, v1.2d
 ; CHECKNOFP16-NEXT:    faddp d0, v0.2d
 ; CHECKNOFP16-NEXT:    ret
-  %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx)
+  %r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx)
   ret double %r
 }
 
 ; Function Attrs: nounwind readnone
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half, <8 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half, <16 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>)
+declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
+declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
+declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 514a43a..8d952a4 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -1,20 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
-declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
-declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
+declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
+declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
+declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
+declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
 
-declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
-declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
+declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
+declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
+  %b = call half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
   ret half %b
 }
 
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
+  %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
   ret float %b
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-LABEL: test_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
+  %b = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
   ret double %b
 }
 
@@ -40,14 +40,14 @@
 ; CHECK-LABEL: test_v1f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
   ret fp128 %b
 }
 
 ; TODO: This doesn't work, because ExpandReductions only supports power of two
 ; unordered reductions.
 ;define float @test_v3f32(<3 x float> %a) nounwind {
-;  %b = call float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
+;  %b = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
 ;  ret float %b
 ;}
 
@@ -55,7 +55,7 @@
 ; CHECK-LABEL: test_v2f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    b fmaxl
-  %b = call fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -67,6 +67,6 @@
 ; CHECK-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
+  %b = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 89cee4f..f1ebd8f 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -1,20 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
-declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
-declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
+declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
+declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
+declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
+declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
 
-declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
-declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
+declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
+declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call nnan half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
+  %b = call nnan half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
   ret half %b
 }
 
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
-  %b = call nnan float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
+  %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
   ret float %b
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-LABEL: test_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call nnan double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
+  %b = call nnan double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
   ret double %b
 }
 
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: test_v1f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
+  %b = call nnan fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -52,7 +52,7 @@
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call nnan float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
+  %b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
   ret float %b
 }
 
@@ -64,7 +64,7 @@
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call nnan ninf float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
+  %b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
   ret float %b
 }
 
@@ -72,7 +72,7 @@
 ; CHECK-LABEL: test_v2f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    b fmaxl
-  %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
+  %b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -84,6 +84,6 @@
 ; CHECK-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
+  %b = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index bb2d6b7..4129fa8 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -1,20 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a)
-declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a)
-declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a)
+declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a)
+declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
+declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
+declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a)
 
-declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a)
-declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a)
+declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
+declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
+declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call nnan half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a)
+  %b = call nnan half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a)
   ret half %b
 }
 
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
-  %b = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a)
+  %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
   ret float %b
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-LABEL: test_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call nnan double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a)
+  %b = call nnan double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
   ret double %b
 }
 
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: test_v1f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a)
+  %b = call nnan fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -52,7 +52,7 @@
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fminnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a)
+  %b = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
   ret float %b
 }
 
@@ -64,7 +64,7 @@
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    fminnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call nnan ninf float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a)
+  %b = call nnan ninf float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
   ret float %b
 }
 
@@ -72,7 +72,7 @@
 ; CHECK-LABEL: test_v2f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    b fminl
-  %b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a)
+  %b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -84,6 +84,6 @@
 ; CHECK-NEXT:    fminnm v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    fminnmv s0, v0.4s
 ; CHECK-NEXT:    ret
-  %b = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a)
+  %b = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
index 41914ca..be661127 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
@@ -3,14 +3,14 @@
 
 ; Same as vecreduce-fmul-legalization.ll, but without fmf.
 
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half, <1 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float, <1 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128, <1 x fp128>)
+declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>)
+declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>)
+declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>)
+declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>)
 
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float, <3 x float>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>)
+declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
+declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
@@ -20,7 +20,7 @@
 ; CHECK-NEXT:    fmul s0, s0, s1
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    ret
-  %b = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half 0.0, <1 x half> %a)
+  %b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 0.0, <1 x half> %a)
   ret half %b
 }
 
@@ -31,7 +31,7 @@
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    fmul s0, s1, v0.s[0]
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float 0.0, <1 x float> %a)
+  %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 0.0, <1 x float> %a)
   ret float %b
 }
 
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    fmov d1, xzr
 ; CHECK-NEXT:    fmul d0, d0, d1
 ; CHECK-NEXT:    ret
-  %b = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double 0.0, <1 x double> %a)
+  %b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 0.0, <1 x double> %a)
   ret double %b
 }
 
@@ -54,7 +54,7 @@
 ; CHECK-NEXT:    bl __multf3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -66,7 +66,7 @@
 ; CHECK-NEXT:    fmul s1, s1, v0.s[1]
 ; CHECK-NEXT:    fmul s0, s1, v0.s[2]
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float 0.0, <3 x float> %a)
+  %b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 0.0, <3 x float> %a)
   ret float %b
 }
 
@@ -84,7 +84,7 @@
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -109,6 +109,6 @@
 ; CHECK-NEXT:    fmul s0, s0, v3.s[2]
 ; CHECK-NEXT:    fmul s0, s0, v3.s[3]
 ; CHECK-NEXT:    ret
-  %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 0.0, <16 x float> %a)
+  %b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 0.0, <16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
index cdb557d..5675fd7 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
@@ -24,8 +24,8 @@
  %1 = insertelement <4 x double> %0, double 1.0, i32 1
  %2 = insertelement <4 x double> %1, double 1.0, i32 2
  %3 = insertelement <4 x double> %2, double 1.0, i32 3
- %4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %3)
+ %4 = call nnan reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> %3)
  ret double %4
 }
 
-declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 7c94152..811745a 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -1,29 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
-declare i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
-declare i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
-declare i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
-declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
-declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
-declare i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)
+declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a)
+declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)
+declare i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a)
+declare i24 @llvm.vector.reduce.umax.v1i24(<1 x i24> %a)
+declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
+declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
+declare i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a)
 
-declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
-declare i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
-declare i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
-declare i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
-declare i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
-declare i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
-declare i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
-declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)
+declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
+declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
+declare i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
+declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
+declare i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
+declare i24 @llvm.vector.reduce.umax.v4i24(<4 x i24> %a)
+declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
+declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a)
 
 define i1 @test_v1i1(<1 x i1> %a) nounwind {
 ; CHECK-LABEL: test_v1i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w0, w0, #0x1
 ; CHECK-NEXT:    ret
-  %b = call i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a)
   ret i1 %b
 }
 
@@ -33,7 +33,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)
   ret i8 %b
 }
 
@@ -43,7 +43,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
-  %b = call i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
+  %b = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a)
   ret i16 %b
 }
 
@@ -51,7 +51,7 @@
 ; CHECK-LABEL: test_v1i24:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
+  %b = call i24 @llvm.vector.reduce.umax.v1i24(<1 x i24> %a)
   ret i24 %b
 }
 
@@ -61,7 +61,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
   ret i32 %b
 }
 
@@ -71,7 +71,7 @@
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
-  %b = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
+  %b = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
   ret i64 %b
 }
 
@@ -79,7 +79,7 @@
 ; CHECK-LABEL: test_v1i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %b = call i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)
+  %b = call i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a)
   ret i128 %b
 }
 
@@ -92,7 +92,7 @@
 ; CHECK-NEXT:   cmp     x9, x8
 ; CHECK-NEXT:   csel    x0, x9, x8, hi
 ; CHECK-NEXT:   ret
-  %b = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
+  %b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
   ret i64 %b
 }
 
@@ -107,7 +107,7 @@
 ; CHECK-NEXT:    umaxv h0, v0.4h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
   ret i8 %b
 }
 
@@ -124,7 +124,7 @@
 ; CHECK-NEXT:    umaxv b0, v0.16b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
   ret i8 %b
 }
 
@@ -135,7 +135,7 @@
 ; CHECK-NEXT:    umaxv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
   ret i32 %b
 }
 
@@ -148,7 +148,7 @@
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
-  %b = call i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -159,7 +159,7 @@
 ; CHECK-NEXT:    umaxv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
+  %b = call i24 @llvm.vector.reduce.umax.v4i24(<4 x i24> %a)
   ret i24 %b
 }
 
@@ -173,7 +173,7 @@
 ; CHECK-NEXT:    csel x0, x8, x9, eq
 ; CHECK-NEXT:    csel x1, x1, x3, hi
 ; CHECK-NEXT:    ret
-  %b = call i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
+  %b = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
   ret i128 %b
 }
 
@@ -186,6 +186,6 @@
 ; CHECK-NEXT:    umaxv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
-  %b = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)
+  %b = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a)
   ret i32 %b
 }
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
index aaa376a..9e3d7e7 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
+declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
 
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-LABEL: test_v4f16:
@@ -37,7 +37,7 @@
 ; CHECK-NEXT:    bl __aeabi_f2h
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %a)
+  %b = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half 0.0, <4 x half> %a)
   ret half %b
 }
 
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:    bl __aeabi_fadd
 ; CHECK-NEXT:    pop {r4, r5, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a)
+  %b = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a)
   ret float %b
 }
 
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    bl __aeabi_dadd
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
+  %b = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
   ret double %b
 }
 
@@ -90,6 +90,6 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+  %b = call fast fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
   ret fp128 %b
 }
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
index 39673bb..83073b8 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>)
+declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
+declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
+declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
+declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
+declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
@@ -28,7 +28,7 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI0_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a)
+  %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
   ret half %b
 }
 
@@ -44,7 +44,7 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI1_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a)
+  %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
   ret float %b
 }
 
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:    vadd.f64 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
-  %b = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a)
+  %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
   ret double %b
 }
 
@@ -76,7 +76,7 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -95,7 +95,7 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI4_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a)
+  %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
   ret float %b
 }
 
@@ -124,7 +124,7 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r4, r5, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -162,6 +162,6 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI6_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a)
+  %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
index 586a02b..48968ee 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
-declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128>)
+declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128>)
 
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-LABEL: test_v4f16:
@@ -37,7 +37,7 @@
 ; CHECK-NEXT:    bl __aeabi_f2h
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a)
+  %b = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
   ret half %b
 }
 
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:    bl fmaxf
 ; CHECK-NEXT:    pop {r4, r5, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a)
+  %b = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
   ret float %b
 }
 
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    bl fmax
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a)
+  %b = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
   ret double %b
 }
 
@@ -90,6 +90,6 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
+  %b = call fast fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
index b64e447..1252085 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
-declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128>)
+declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128>)
 
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-LABEL: test_v4f16:
@@ -37,7 +37,7 @@
 ; CHECK-NEXT:    bl __aeabi_f2h
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a)
+  %b = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
   ret half %b
 }
 
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:    bl fminf
 ; CHECK-NEXT:    pop {r4, r5, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a)
+  %b = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
   ret float %b
 }
 
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    bl fmin
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a)
+  %b = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
   ret double %b
 }
 
@@ -90,6 +90,6 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a)
+  %b = call fast fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
index 62111e5..4217eab 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half, <4 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>)
+declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>)
+declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
 
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-LABEL: test_v4f16:
@@ -37,7 +37,7 @@
 ; CHECK-NEXT:    bl __aeabi_f2h
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half 1.0, <4 x half> %a)
+  %b = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %a)
   ret half %b
 }
 
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:    bl __aeabi_fmul
 ; CHECK-NEXT:    pop {r4, r5, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a)
+  %b = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
   ret float %b
 }
 
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    bl __aeabi_dmul
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a)
+  %b = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a)
   ret double %b
 }
 
@@ -90,6 +90,6 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fast fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
+  %b = call fast fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
   ret fp128 %b
 }
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
index 003b64be..b3d23ad 100644
--- a/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
+++ b/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half, <1 x half>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float, <1 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128, <1 x fp128>)
+declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>)
+declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>)
+declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>)
+declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>)
 
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float, <3 x float>)
-declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>)
+declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
+declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
 
 define half @test_v1f16(<1 x half> %a) nounwind {
 ; CHECK-LABEL: test_v1f16:
@@ -28,7 +28,7 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI0_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half 0.0, <1 x half> %a)
+  %b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 0.0, <1 x half> %a)
   ret half %b
 }
 
@@ -44,7 +44,7 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI1_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float 0.0, <1 x float> %a)
+  %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 0.0, <1 x float> %a)
   ret float %b
 }
 
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:    vmul.f64 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
-  %b = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double 0.0, <1 x double> %a)
+  %b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 0.0, <1 x double> %a)
   ret double %b
 }
 
@@ -76,7 +76,7 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
   ret fp128 %b
 }
 
@@ -95,7 +95,7 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI4_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float 0.0, <3 x float> %a)
+  %b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 0.0, <3 x float> %a)
   ret float %b
 }
 
@@ -124,7 +124,7 @@
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    pop {r4, r5, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
-  %b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+  %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
   ret fp128 %b
 }
 
@@ -162,6 +162,6 @@
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI6_0:
 ; CHECK-NEXT:    .long 0x00000000 @ float 0
-  %b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 0.0, <16 x float> %a)
+  %b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 0.0, <16 x float> %a)
   ret float %b
 }
diff --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
index e0e3149..99aa181 100644
--- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
+++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
@@ -1,24 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -expand-reductions -S | FileCheck %s
 ; Tests without a target which should expand all reductions
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
 
-declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
 
-declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
 
-declare i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>)
 
 define i64 @add_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @add_i64(
@@ -29,7 +29,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -42,7 +42,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -68,7 +68,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -81,7 +81,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -97,7 +97,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
+  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
   ret float %r
 }
 
@@ -113,7 +113,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
+  %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
   ret float %r
 }
 
@@ -131,7 +131,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %vec)
+  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
   ret float %r
 }
 
@@ -149,7 +149,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
+  %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
   ret float %r
 }
 
@@ -165,7 +165,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
+  %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
   ret float %r
 }
 
@@ -181,7 +181,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
+  %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
   ret float %r
 }
 
@@ -199,7 +199,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %vec)
+  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
   ret float %r
 }
 
@@ -217,7 +217,7 @@
 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 ;
 entry:
-  %r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
+  %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
   ret float %r
 }
 
@@ -231,7 +231,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -245,7 +245,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -259,7 +259,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -273,7 +273,7 @@
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec)
+  %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
   ret i64 %r
 }
 
@@ -282,11 +282,11 @@
 define double @fmax_f64(<2 x double> %vec) {
 ; CHECK-LABEL: @fmax_f64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
 ; CHECK-NEXT:    ret double [[R]]
 ;
 entry:
-  %r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec)
+  %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec)
   ret double %r
 }
 
@@ -295,11 +295,11 @@
 define double @fmin_f64(<2 x double> %vec) {
 ; CHECK-LABEL: @fmin_f64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
 ; CHECK-NEXT:    ret double [[R]]
 ;
 entry:
-  %r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec)
+  %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec)
   ret double %r
 }
 
@@ -309,10 +309,10 @@
 define i8 @test_v3i8(<3 x i8> %a) nounwind {
 ; CHECK-LABEL: @test_v3i8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B:%.*]] = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
 ; CHECK-NEXT:    ret i8 [[B]]
 ;
 entry:
-  %b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
+  %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a)
   ret i8 %b
 }
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
index 77e35de..5bc82d4 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
@@ -44,7 +44,7 @@
     %add7 = add <4 x i32> %mul, %splat.output
     %max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef)
     %min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef)
-    %reduce = tail call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %min)
+    %reduce = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %min)
     store i32 %reduce, i32* %scevgep2
     %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4
     %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4
@@ -62,7 +62,7 @@
   declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
   declare i1 @llvm.test.set.loop.iterations.i32(i32) #4
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #4
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #5
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #5
 
 ...
 ---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
index 522cce4..29ecf00c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
@@ -85,7 +85,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %tmp8 = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
-  %tmp9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp8)
+  %tmp9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp8)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -188,7 +188,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
-  %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc)
+  %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -287,7 +287,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
-  %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc)
+  %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -386,7 +386,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
-  %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc)
+  %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -528,6 +528,6 @@
 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
 
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
index f27a98c..ab3c866 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
@@ -56,7 +56,7 @@
     br i1 %tmp16, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %tmp17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp14)
+    %tmp17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp14)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -64,7 +64,7 @@
     ret i32 %res.0.lcssa
   }
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
index 5db355a..b796712 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
@@ -58,7 +58,7 @@
     br i1 %tmp16, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %tmp17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp14)
+    %tmp17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp14)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -66,7 +66,7 @@
     ret i32 %res.0.lcssa
   }
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
index ab2ffb5..2b35400 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
@@ -68,7 +68,7 @@
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
     %tmp12 = mul nsw <4 x i32> %pass, %tmp10
-    %tmp13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp12)
+    %tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12)
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
     %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
     %tmp16 = icmp ne i32 %tmp15, 0
@@ -105,7 +105,7 @@
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
     %tmp12 = add nsw <4 x i32> %pass, %tmp10
-    %tmp13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp12)
+    %tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12)
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
     %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
     %tmp16 = icmp ne i32 %tmp15, 0
@@ -117,7 +117,7 @@
     ret i32 %res
   }
 
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
   declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
   declare void @llvm.set.loop.iterations.i32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
index e5131fd..cdf53b8 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
@@ -40,7 +40,7 @@
     br i1 %15, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %16 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
+    %16 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -88,7 +88,7 @@
     br i1 %15, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %16 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
+    %16 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -98,7 +98,7 @@
 
   declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
   declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
-  declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
+  declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
   declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
index 886fbe7..f9d1abb 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
@@ -91,7 +91,7 @@
     %22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %10)
     %23 = bitcast i16* %lsr.iv7 to i1*
     %24 = select <4 x i1> %22, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa
-    %25 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %24)
+    %25 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %24)
     %sunkaddr = mul i32 %i.064.us, 4
     %26 = bitcast i32* %e to i8*
     %sunkaddr17 = getelementptr inbounds i8, i8* %26, i32 %sunkaddr
@@ -141,7 +141,7 @@
   }
   declare dso_local arm_aapcs_vfpcc signext i16 @crc16(...) local_unnamed_addr #0
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index 2b90065..5c3af35 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -69,7 +69,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
-  %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
+  %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -145,7 +145,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
-  %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
+  %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -221,7 +221,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
-  %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
+  %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -297,7 +297,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
-  %8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
+  %8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -371,7 +371,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %6 = select <4 x i1> %1, <4 x i32> %4, <4 x i32> %vec.phi
-  %7 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %6)
+  %7 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %6)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -1273,6 +1273,6 @@
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
index e5bcf2e..c797e04 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
@@ -51,7 +51,7 @@
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP14]], <4 x i32> [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
+; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
 ; CHECK-NEXT:    store i32 [[TMP18]], i32* [[ARRAYIDX8_US]], align 4
 ; CHECK-NEXT:    [[INC10_US]] = add nuw i32 [[I_025_US]], 1
 ; CHECK-NEXT:    [[EXITCOND27:%.*]] = icmp eq i32 [[INC10_US]], [[N]]
@@ -112,7 +112,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %tmp17 = select <4 x i1> %tmp7, <4 x i32> %tmp14, <4 x i32> %vec.phi
-  %tmp18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp17)
+  %tmp18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp17)
   store i32 %tmp18, i32* %arrayidx8.us, align 4
   %inc10.us = add nuw i32 %i.025.us, 1
   %exitcond27 = icmp eq i32 %inc10.us, %N
@@ -170,7 +170,7 @@
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP12]], <4 x i32> [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
+; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
 ; CHECK-NEXT:    store i32 [[TMP16]], i32* [[ARRAYIDX7_US]], align 4
 ; CHECK-NEXT:    [[INC9_US]] = add nuw i32 [[I_024_US]], 1
 ; CHECK-NEXT:    [[EXITCOND26:%.*]] = icmp eq i32 [[INC9_US]], [[N]]
@@ -229,7 +229,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %tmp15 = select <4 x i1> %tmp7, <4 x i32> %tmp12, <4 x i32> %vec.phi
-  %tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp15)
+  %tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp15)
   store i32 %tmp16, i32* %arrayidx7.us, align 4
   %inc9.us = add nuw i32 %i.024.us, 1
   %exitcond26 = icmp eq i32 %inc9.us, %N
@@ -247,7 +247,7 @@
 declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #0
 
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #1
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1
 
 ; Function Attrs: noduplicate nounwind
 declare void @llvm.set.loop.iterations.i32(i32) #2
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
index 9eb95d7..4a5f483 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
@@ -40,7 +40,7 @@
     br i1 %tmp15, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp13)
+    %tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp13)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -48,7 +48,7 @@
     ret i32 %res.0.lcssa
   }
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
index 65f9cc3..c27a6c3 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
@@ -44,7 +44,7 @@
     %.lcssa = phi <16 x i8> [ %13, %vector.body ]
     %16 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7)
     %17 = select <16 x i1> %16, <16 x i8> %.lcssa, <16 x i8> %vec.phi.lcssa
-    %18 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %17)
+    %18 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %17)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -53,7 +53,7 @@
   }
 
   declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
-  declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) #2
+  declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
index 966bdc9..3a098f2 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir
@@ -36,7 +36,7 @@
     br i1 %cmp, label %for.body, label %middle.block
 
   middle.block:                                     ; preds = %for.body
-    %reduce = tail call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %acc.next)
+    %reduce = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %acc.next)
     ret i16 %reduce
 
   for.cond.cleanup:                                 ; preds = %entry
@@ -47,7 +47,7 @@
   declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) #2
   declare i1 @llvm.test.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
-  declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) #4
+  declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #4
   declare <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1
 
 ...
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
index f013cb2..c3655ba 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir
@@ -41,7 +41,7 @@
     br i1 %16, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
+    %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -88,7 +88,7 @@
     br i1 %16, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
+    %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -135,7 +135,7 @@
     br i1 %16, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
+    %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -182,7 +182,7 @@
     br i1 %16, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
+    %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -228,7 +228,7 @@
     br i1 %14, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %15 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12)
+    %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -274,7 +274,7 @@
     br i1 %14, label %vector.body, label %middle.block
 
   middle.block:                                     ; preds = %vector.body
-    %15 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12)
+    %15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -285,7 +285,7 @@
   declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
   declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
   declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index a0cdb82..f911663 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -45,7 +45,7 @@
   %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
   %i4 = add <16 x i8> %wide.masked.load, %wide.masked.load16
   %i5 = select <16 x i1> %active.lane.mask, <16 x i8> %i4, <16 x i8> %vec.phi
-  %i6 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %i5)
+  %i6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i5)
   %index.next = add i32 %index, 16
   %i7 = icmp eq i32 %index.next, %n.vec
   br i1 %i7, label %middle.block, label %vector.body
@@ -123,7 +123,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i9 = select <8 x i1> %active.lane.mask, <8 x i16> %i7, <8 x i16> %vec.phi
-  %i10 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i9)
+  %i10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i9)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -193,7 +193,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i7 = select <16 x i1> %active.lane.mask, <16 x i8> %i5, <16 x i8> %vec.phi
-  %i8 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %i7)
+  %i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i7)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -265,7 +265,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i9 = select <8 x i1> %active.lane.mask, <8 x i16> %i7, <8 x i16> %vec.phi
-  %i10 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i9)
+  %i10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i9)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -335,7 +335,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i7 = select <16 x i1> %active.lane.mask, <16 x i8> %i5, <16 x i8> %vec.phi
-  %i8 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %i7)
+  %i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i7)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -407,7 +407,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i9 = select <8 x i1> %active.lane.mask, <8 x i16> %i7, <8 x i16> %vec.phi
-  %i10 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i9)
+  %i10 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i9)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -504,7 +504,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i9 = select <4 x i1> %active.lane.mask, <4 x i32> %i7, <4 x i32> %vec.phi
-  %i10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %i9)
+  %i10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %i9)
   br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph47
 
 vector.ph47:                                      ; preds = %middle.block
@@ -534,7 +534,7 @@
 
 middle.block44:                                   ; preds = %vector.body46
   %i21 = select <4 x i1> %active.lane.mask61, <4 x i32> %i19, <4 x i32> %vec.phi60
-  %i22 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %i21)
+  %i22 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %i21)
   br label %for.cond.cleanup7
 
 for.cond.cleanup7:                                ; preds = %middle.block44, %middle.block, %entry
@@ -620,9 +620,9 @@
 
 middle.block:                                     ; preds = %vector.body
   %i11 = select <8 x i1> %active.lane.mask, <8 x i16> %i8, <8 x i16> %vec.phi
-  %i12 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i11)
+  %i12 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i11)
   %i13 = select <8 x i1> %active.lane.mask, <8 x i16> %i9, <8 x i16> %vec.phi.1
-  %i14 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %i13)
+  %i14 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %i13)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -747,7 +747,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %10 = select <4 x i1> %active.lane.mask, <4 x i32> %8, <4 x i32> %vec.phi
-  %11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %10)
+  %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %10)
   br label %for.end
 
 for.end:                                          ; preds = %middle.block, %lor.end
@@ -758,10 +758,10 @@
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
 declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
 declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
 declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
index 17acf67a..497f041 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
@@ -46,7 +46,7 @@
     %.lcssa = phi <4 x i32> [ %15, %vector.body ], !dbg !38
     %18 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %9), !dbg !34
     %19 = select <4 x i1> %18, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa, !dbg !38
-    %20 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %19), !dbg !32
+    %20 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %19), !dbg !32
     br label %for.cond.cleanup, !dbg !42
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -58,7 +58,7 @@
 
   declare void @llvm.dbg.value(metadata, metadata, metadata)
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
   declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
index 338c980..d786209 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
@@ -258,7 +258,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %19 = select <4 x i1> %active.lane.mask, <4 x i32> %16, <4 x i32> %vec.phi
-  %20 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %19)
+  %20 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %19)
   br label %for.end
 
 for.end:                                          ; preds = %middle.block, %for.body
@@ -282,6 +282,6 @@
 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
 declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
index 26be532..6d14058 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll
@@ -74,14 +74,14 @@
   br i1 %8, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %9 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %7)
-  %10 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %5)
+  %9 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %7)
+  %10 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %5)
   store i32 %10, i32* %minp, align 4
   ret i32 %9
 }
 
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
-declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>) #3
-declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>) #3
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) #3
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) #3
 
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
index 1f212c9..dec5400 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
@@ -26,7 +26,7 @@
     %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7)
     %tmp9 = sub i32 %tmp7, 8
     %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
-    %min = tail call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %wide.masked.load)
+    %min = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %wide.masked.load)
     store i16 %min, i16* %lsr.iv.2
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
     %scevgep.2 = getelementptr i16, i16* %lsr.iv.2, i32 1
@@ -43,7 +43,7 @@
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
   declare <8 x i1> @llvm.arm.mve.vctp16(i32)
-  declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+  declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
 
 ...
 ---
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
index cd8310c..1d9f7d7 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
@@ -26,7 +26,7 @@
     %tmp9 = sub i32 %tmp7, 4
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp10)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10)
     store i32 %tmp11, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
@@ -64,7 +64,7 @@
     %tmp9 = sub i32 %tmp7, 8
     %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
     %sext = sext <8 x i16> %wide.masked.load to <8 x i32>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %sext)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %sext)
     store i32 %tmp11, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
@@ -102,7 +102,7 @@
     %tmp9 = sub i32 %tmp7, 16
     %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
     %sext = sext <16 x i8> %wide.masked.load to <16 x i32>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %sext)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %sext)
     store i32 %tmp11, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
     %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
@@ -140,7 +140,7 @@
     %tmp9 = sub i32 %tmp7, 4
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp10)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10)
     %acc.next = add i32 %tmp11, %acc
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
     %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
@@ -179,7 +179,7 @@
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
     %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
     store i32 %tmp11, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
@@ -218,7 +218,7 @@
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
     %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
     %acc.next = add i32 %tmp11, %acc
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
     %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
@@ -257,7 +257,7 @@
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32>
     %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
     store i32 %tmp11, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
@@ -296,7 +296,7 @@
     %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
     %tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32>
     %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1>
-    %tmp11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %not)
+    %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not)
     %acc.next = add i32 %tmp11, %acc
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 4
     %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
@@ -335,7 +335,7 @@
     %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef)
     %sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16>
     %sub = sub <8 x i16> %sext.wide, %pass
-    %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub)
+    %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
     %sext.reduce = sext i16 %reduce to i32
     store i32 %sext.reduce, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
@@ -375,7 +375,7 @@
     %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef)
     %sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16>
     %sub = sub <8 x i16> %sext.wide, %pass
-    %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub)
+    %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
     %sext.reduce = sext i16 %reduce to i32
     %acc.next = add i32 %sext.reduce, %acc
     %scevgep = getelementptr i8, i8* %lsr.iv, i32 8
@@ -414,7 +414,7 @@
     %tmp9 = sub i32 %tmp7, 8
     %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
     %sub = sub <8 x i16> %wide.masked.load, %pass
-    %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub)
+    %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
     %zext.reduce = zext i16 %reduce to i32
     store i32 %zext.reduce, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
@@ -453,7 +453,7 @@
     %tmp9 = sub i32 %tmp7, 8
     %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef)
     %sub = sub <8 x i16> %wide.masked.load, %pass
-    %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %sub)
+    %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub)
     %zext.reduce = zext i16 %reduce to i32
     %acc.next = add i32 %zext.reduce, %acc
     %scevgep = getelementptr i16, i16* %lsr.iv, i32 8
@@ -492,7 +492,7 @@
     %tmp9 = sub i32 %tmp7, 16
     %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
     %xor = xor <16 x i8> %wide.masked.load, %pass
-    %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor)
+    %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
     %sext.reduce = sext i8 %reduce to i32
     store i32 %sext.reduce, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
@@ -531,7 +531,7 @@
     %tmp9 = sub i32 %tmp7, 16
     %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
     %xor = xor <16 x i8> %wide.masked.load, %pass
-    %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor)
+    %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
     %sext.reduce = sext i8 %reduce to i32
     %acc.next = add i32 %sext.reduce, %acc
     %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
@@ -570,7 +570,7 @@
     %tmp9 = sub i32 %tmp7, 16
     %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
     %xor = xor <16 x i8> %wide.masked.load, %pass
-    %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor)
+    %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
     %zext.reduce = zext i8 %reduce to i32
     store i32 %zext.reduce, i32* %store.addr
     %store.next = getelementptr i32, i32* %store.addr, i32 1
@@ -609,7 +609,7 @@
     %tmp9 = sub i32 %tmp7, 16
     %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef)
     %xor = xor <16 x i8> %wide.masked.load, %pass
-    %reduce = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %xor)
+    %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor)
     %zext.reduce = zext i8 %reduce to i32
     %acc.next = add i32 %zext.reduce, %acc
     %scevgep = getelementptr i8, i8* %lsr.iv, i32 16
@@ -652,7 +652,7 @@
     %tmp4 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp3, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer)
     %zext.wide.2 = zext <4 x i16> %tmp4 to <4 x i32>
     %or = or <4 x i32> %zext.wide.1, %zext.wide.2
-    %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %or)
+    %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %or)
     %acc.next = add i32 %reduce, %acc
     %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 4
     %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 4
@@ -693,7 +693,7 @@
     %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
     %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
     %or = or <8 x i16> %tmp2, %tmp4
-    %reduce = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %or)
+    %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %or)
     %zext.reduce = zext i16 %reduce to i32
     %acc.next = add i32 %zext.reduce, %acc
     %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8
@@ -737,7 +737,7 @@
     %tmp5 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1)
     %tmp6 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 0)
     %mul = add <4 x i32> %tmp5, %tmp6
-    %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %mul)
+    %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul)
     %acc.next = add i32 %reduce, %acc
     %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8
     %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8
@@ -778,7 +778,7 @@
     %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
     %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer)
     %mul = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1)
-    %reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %mul)
+    %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul)
     %acc.next = add i32 %reduce, %acc
     %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8
     %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8
@@ -798,11 +798,11 @@
   declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
   declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
   declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-  declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-  declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-  declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-  declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+  declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+  declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+  declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+  declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
   declare <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
index 15aed3b..7aa772c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
@@ -214,7 +214,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %i19 = select <4 x i1> %active.lane.mask, <4 x i32> %i16, <4 x i32> %vec.phi
-  %i20 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %i19)
+  %i20 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %i19)
   br label %for.end
 
 for.end:                                          ; preds = %middle.block, %for.body
@@ -235,6 +235,6 @@
 
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
 declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
 declare void @llvm.set.loop.iterations.i32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
index 4f80869..4308c7e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir
@@ -47,7 +47,7 @@
     %15 = add i32 %8, 4
     %16 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %15)
     %17 = select <4 x i1> %16, <4 x i32> %12, <4 x i32> %vec.phi
-    %18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %17)
+    %18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %17)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -55,7 +55,7 @@
     ret i32 %res.0.lcssa
   }
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
   declare <4 x i1> @llvm.arm.mve.vctp32(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
index a42c33e..9799ceb 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir
@@ -46,7 +46,7 @@
     %.lcssa = phi <8 x i16> [ %15, %vector.body ]
     %18 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %7)
     %19 = select <8 x i1> %18, <8 x i16> %.lcssa, <8 x i16> %vec.phi.lcssa
-    %20 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %19)
+    %20 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %19)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -54,7 +54,7 @@
     ret i16 %a.0.lcssa
   }
   declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
-  declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+  declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
   declare <8 x i1> @llvm.arm.mve.vctp16(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
index 6628df2..422fc3c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
@@ -70,7 +70,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %8 = select <4 x i1> %1, <4 x i32> %6, <4 x i32> %vec.phi
-  %9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %8)
+  %9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %8)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -141,7 +141,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %5 = select <4 x i1> %1, <4 x i32> %3, <4 x i32> %vec.phi
-  %6 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %5)
+  %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -212,7 +212,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %5 = select <4 x i1> %1, <4 x i32> %3, <4 x i32> %vec.phi
-  %6 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %5)
+  %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -459,7 +459,7 @@
 declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
index 64e7552..c05ed7d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
@@ -16,7 +16,7 @@
 
 ; CHECK: middle.block:
 ; CHECK: [[VPSEL:%[^ ]+]] = select <4 x i1> [[VCTP]],
-; CHECK: call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]])
+; CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]])
 
 define i32 @vec_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
 entry:
@@ -64,7 +64,7 @@
 
 middle.block:                                     ; preds = %vector.body
   %12 = select <4 x i1> %7, <4 x i32> %9, <4 x i32> %vec.phi
-  %13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12)
+  %13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12)
   br label %for.cond.cleanup
 
 for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -73,7 +73,7 @@
 }
 
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare void @llvm.set.loop.iterations.i32(i32)
 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
index 23cdf73..07c136e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir
@@ -118,7 +118,7 @@
   middle.block:                                     ; preds = %vector.body
     %8 = call <4 x i1> @llvm.arm.vctp32(i32 %5)
     %tmp8 = select <4 x i1> %8, <4 x i32> %tmp6, <4 x i32> %vec.phi
-    %tmp9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp8)
+    %tmp9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp8)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -134,7 +134,7 @@
   declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
   declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
   declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
   declare <16 x i1> @llvm.arm.vctp8(i32)
   declare void @llvm.stackprotector(i8*, i8**)
   declare <8 x i1> @llvm.arm.vctp16(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
index fc0aa20..fa7304e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir
@@ -46,7 +46,7 @@
     %.lcssa = phi <8 x i16> [ %15, %vector.body ]
     %18 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %7)
     %19 = select <8 x i1> %18, <8 x i16> %.lcssa, <8 x i16> %vec.phi.lcssa
-    %20 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %19)
+    %20 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %19)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -54,7 +54,7 @@
     ret i16 %a.0.lcssa
   }
   declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
-  declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+  declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
   declare void @llvm.set.loop.iterations.i32(i32)
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
   declare <8 x i1> @llvm.arm.mve.vctp16(i32)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
index d91556e3..7ef303a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir
@@ -52,7 +52,7 @@
     %n.splat = shufflevector <4 x i32> %insert.n, <4 x i32> undef, <4 x i32> zeroinitializer
     %tmp16 = icmp ult <4 x i32> %idx.splat, %n.splat
     %tmp17 = select <4 x i1> %tmp16, <4 x i32> %tmp13, <4 x i32> %vec.phi
-    %tmp18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp17)
+    %tmp18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp17)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -60,7 +60,7 @@
     ret i32 %res.0.lcssa
   }
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
index 3378161..00abf16 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir
@@ -45,7 +45,7 @@
   middle.block:                                     ; preds = %vector.body
     %15 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8)
     %16 = select <4 x i1> %15, <4 x i32> %12, <4 x i32> %vec.phi
-    %17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %16)
+    %17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %16)
     br label %for.cond.cleanup
 
   for.cond.cleanup:                                 ; preds = %middle.block, %entry
@@ -53,7 +53,7 @@
     ret i32 %res.0.lcssa
   }
   declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
-  declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
+  declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
   declare void @llvm.set.loop.iterations.i32(i32) #3
   declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
   declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
index d158c85..e06ec42 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
@@ -572,7 +572,7 @@
   br i1 %10, label %middle.block, label %vector.body, !llvm.loop !7
 
 middle.block:                                     ; preds = %vector.body
-  %11 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9)
+  %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9)
 ;for.cond8.for.cond.cleanup10_crit_edge.us.us:     ; preds = %for.body11.us.us, %middle.block
   %add19.us.us = add i32 %j.051.us.us, %mul18.us
   %arrayidx20.us.us = getelementptr inbounds i32, i32* %C, i32 %add19.us.us
@@ -803,7 +803,7 @@
   br i1 %12, label %middle.block, label %vector.body, !llvm.loop !7
 
 middle.block:                                     ; preds = %vector.body
-  %13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %11)
+  %13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %11)
   br i1 %cmp.n, label %for.cond5.for.cond.cleanup7_crit_edge.us.us, label %for.body8.us.us.preheader
 
 for.cond5.for.cond.cleanup7_crit_edge.us.us:      ; preds = %for.body8.us.us, %middle.block
@@ -1065,7 +1065,7 @@
   %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp85, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
   %tmp86 = sext <4 x i8> %wide.masked.gather75 to <4 x i32>
   %tmp87 = mul nsw <4 x i32> %tmp84, %tmp86
-  %tmp88 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp87)
+  %tmp88 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp87)
   %tmp89 = add i32 %tmp88, %vec.phi
   %index.next = add i32 %index, 4
   %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
@@ -1091,7 +1091,7 @@
 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) #3
 
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare void @llvm.memset.p0i8.i32(i8* align 2, i8, i32, i1)
 
 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
index a4a6751..5c32f37 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll
@@ -62,7 +62,7 @@
   br i1 %8, label %middle.block, label %vector.body
 middle.block:                                     ; preds = %vector.body
   %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi
-  %10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9)
+  %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9)
   store i32 %10, i32* %arrayidx.us.us, align 4
   %inc21.us.us = add nuw i32 4, 1
   %exitcond81.not = icmp eq i32 %inc21.us.us, %n
@@ -139,7 +139,7 @@
   br i1 %8, label %middle.block, label %vector.body
 middle.block:                                     ; preds = %vector.body
   %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi
-  %10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9)
+  %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9)
   store i32 %10, i32* %arrayidx.us.us, align 4
   %inc21.us.us = add nuw i32 4, 1
   %exitcond81.not = icmp eq i32 %inc21.us.us, %n
@@ -210,7 +210,7 @@
   br i1 %8, label %middle.block, label %vector.body
 middle.block:                                     ; preds = %vector.body
   %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi
-  %10 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %9)
+  %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9)
   store i32 %10, i32* %arrayidx.us.us, align 4
   %inc21.us.us = add nuw i32 4, 1
   %exitcond81.not = icmp eq i32 %inc21.us.us, %n
@@ -440,7 +440,7 @@
   ret void
 }
 
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
index d67ccd9..b710912 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
@@ -1390,7 +1390,7 @@
 declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
 declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
 declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i8(<16 x i32> %ext4)
+declare i32 @llvm.vector.reduce.add.v16i8(<16 x i32> %ext4)
 declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>)
 declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>)
 declare i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32, i32, i32, i32, <8 x i16>, <8 x i16>, <8 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vaddv.ll b/llvm/test/CodeGen/Thumb2/mve-vaddv.ll
index e3f236b..d4a0456 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vaddv.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vaddv.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
 
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
+declare i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64>)
+declare i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32>)
+declare i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16>)
+declare i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8>)
 
 define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) {
 ; CHECK-LABEL: vaddv_v2i64_i64:
@@ -20,7 +20,7 @@
 ; CHECK-NEXT:    adcs r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
+  %r = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
   ret i64 %r
 }
 
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:    vaddv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %s1)
   ret i32 %r
 }
 
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    vaddv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32> %s1)
   ret i32 %r
 }
 
@@ -51,7 +51,7 @@
 ; CHECK-NEXT:    vaddv.u16 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16> %s1)
   ret i16 %r
 }
 
@@ -62,7 +62,7 @@
 ; CHECK-NEXT:    vaddv.u16 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16> %s1)
   ret i16 %r
 }
 
@@ -72,7 +72,7 @@
 ; CHECK-NEXT:    vaddv.u8 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8> %s1)
   ret i8 %r
 }
 
@@ -83,7 +83,7 @@
 ; CHECK-NEXT:    vaddv.u8 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %r = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8> %s1)
   ret i8 %r
 }
 
@@ -102,7 +102,7 @@
 ; CHECK-NEXT:    adcs r1, r3
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
-  %t = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
+  %t = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
   %r = add i64 %t, %x
   ret i64 %r
 }
@@ -113,7 +113,7 @@
 ; CHECK-NEXT:    vaddva.u32 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %t = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %s1)
+  %t = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %s1)
   %r = add i32 %t, %x
   ret i32 %r
 }
@@ -125,7 +125,7 @@
 ; CHECK-NEXT:    vaddva.u32 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %t = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %s1)
+  %t = call i32 @llvm.vector.reduce.add.i32.v8i32(<8 x i32> %s1)
   %r = add i32 %t, %x
   ret i32 %r
 }
@@ -136,7 +136,7 @@
 ; CHECK-NEXT:    vaddva.u16 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %t = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %s1)
+  %t = call i16 @llvm.vector.reduce.add.i16.v8i16(<8 x i16> %s1)
   %r = add i16 %t, %x
   ret i16 %r
 }
@@ -148,7 +148,7 @@
 ; CHECK-NEXT:    vaddva.u16 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %t = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %s1)
+  %t = call i16 @llvm.vector.reduce.add.i16.v16i16(<16 x i16> %s1)
   %r = add i16 %t, %x
   ret i16 %r
 }
@@ -159,7 +159,7 @@
 ; CHECK-NEXT:    vaddva.u8 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %t = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %s1)
+  %t = call i8 @llvm.vector.reduce.add.i8.v16i8(<16 x i8> %s1)
   %r = add i8 %t, %x
   ret i8 %r
 }
@@ -171,7 +171,7 @@
 ; CHECK-NEXT:    vaddva.u8 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %t = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %s1)
+  %t = call i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8> %s1)
   %r = add i8 %t, %x
   ret i8 %r
 }
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
index 35eecab..df2cb43 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT:    vaddv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
   ret i32 %z
 }
 
@@ -18,7 +18,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <4 x i32> %x to <4 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
   ret i64 %z
 }
 
@@ -29,7 +29,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <4 x i32> %x to <4 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
   ret i64 %z
 }
 
@@ -47,7 +47,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <2 x i32> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   ret i64 %z
 }
 
@@ -65,7 +65,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <2 x i32> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   ret i64 %z
 }
 
@@ -76,7 +76,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <8 x i16> %x to <8 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
   ret i32 %z
 }
 
@@ -87,7 +87,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <8 x i16> %x to <8 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
   ret i32 %z
 }
 
@@ -99,7 +99,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <4 x i16> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   ret i32 %z
 }
 
@@ -111,7 +111,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <4 x i16> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   ret i32 %z
 }
 
@@ -122,7 +122,7 @@
 ; CHECK-NEXT:    uxth r0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
   ret i16 %z
 }
 
@@ -175,7 +175,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <8 x i16> %x to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
   ret i64 %z
 }
 
@@ -242,7 +242,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <8 x i16> %x to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
   ret i64 %z
 }
 
@@ -258,7 +258,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <2 x i16> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   ret i64 %z
 }
 
@@ -278,7 +278,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <2 x i16> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   ret i64 %z
 }
 
@@ -289,7 +289,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <16 x i8> %x to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
   ret i32 %z
 }
 
@@ -300,7 +300,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <16 x i8> %x to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
   ret i32 %z
 }
 
@@ -313,7 +313,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <4 x i8> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   ret i32 %z
 }
 
@@ -326,7 +326,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <4 x i8> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   ret i32 %z
 }
 
@@ -338,7 +338,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <16 x i8> %x to <16 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
   ret i16 %z
 }
 
@@ -350,7 +350,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <16 x i8> %x to <16 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
   ret i16 %z
 }
 
@@ -363,7 +363,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <8 x i8> %x to <8 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
   ret i16 %z
 }
 
@@ -376,7 +376,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <8 x i8> %x to <8 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
   ret i16 %z
 }
 
@@ -387,7 +387,7 @@
 ; CHECK-NEXT:    uxtb r0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
   ret i8 %z
 }
 
@@ -492,7 +492,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <16 x i8> %x to <16 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
   ret i64 %z
 }
 
@@ -627,7 +627,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <16 x i8> %x to <16 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
   ret i64 %z
 }
 
@@ -643,7 +643,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <2 x i8> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   ret i64 %z
 }
 
@@ -663,7 +663,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <2 x i8> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   ret i64 %z
 }
 
@@ -678,7 +678,7 @@
 ; CHECK-NEXT:    adcs r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
   ret i64 %z
 }
 
@@ -688,7 +688,7 @@
 ; CHECK-NEXT:    vaddva.u32 r0, q0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -700,7 +700,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <4 x i32> %x to <4 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -712,7 +712,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <4 x i32> %x to <4 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -735,7 +735,7 @@
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
   %xx = zext <2 x i32> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -756,7 +756,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <2 x i32> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -768,7 +768,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <8 x i16> %x to <8 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -780,7 +780,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <8 x i16> %x to <8 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -793,7 +793,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <4 x i16> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -806,7 +806,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <4 x i16> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -818,7 +818,7 @@
 ; CHECK-NEXT:    uxth r0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -876,7 +876,7 @@
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %xx = zext <8 x i16> %x to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -948,7 +948,7 @@
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %xx = sext <8 x i16> %x to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -967,7 +967,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <2 x i16> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -990,7 +990,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <2 x i16> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1002,7 +1002,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <16 x i8> %x to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1014,7 +1014,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <16 x i8> %x to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1028,7 +1028,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <4 x i8> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1042,7 +1042,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <4 x i8> %x to <4 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1055,7 +1055,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <16 x i8> %x to <16 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1068,7 +1068,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <16 x i8> %x to <16 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1082,7 +1082,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <8 x i8> %x to <8 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1096,7 +1096,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <8 x i8> %x to <8 x i16>
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1108,7 +1108,7 @@
 ; CHECK-NEXT:    uxtb r0, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
   %r = add i8 %z, %a
   ret i8 %r
 }
@@ -1218,7 +1218,7 @@
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %xx = zext <16 x i8> %x to <16 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1358,7 +1358,7 @@
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %xx = sext <16 x i8> %x to <16 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1377,7 +1377,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = zext <2 x i8> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1400,7 +1400,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %xx = sext <2 x i8> %x to <2 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1420,18 +1420,18 @@
 ; CHECK-NEXT:    adcs r1, r3
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
   %r = add i64 %z, %a
   ret i64 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
index 0f3aacf..e59fb0b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
@@ -10,7 +10,7 @@
 entry:
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -24,7 +24,7 @@
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %xx = zext <4 x i32> %x to <4 x i64>
   %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   ret i64 %z
 }
 
@@ -38,7 +38,7 @@
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %xx = sext <4 x i32> %x to <4 x i64>
   %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   ret i64 %z
 }
 
@@ -73,7 +73,7 @@
   %c = icmp eq <2 x i32> %b, zeroinitializer
   %xx = zext <2 x i32> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -114,7 +114,7 @@
   %c = icmp eq <2 x i32> %b, zeroinitializer
   %xx = sext <2 x i32> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -128,7 +128,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = zext <8 x i16> %x to <8 x i32>
   %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   ret i32 %z
 }
 
@@ -142,7 +142,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = sext <8 x i16> %x to <8 x i32>
   %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   ret i32 %z
 }
 
@@ -158,7 +158,7 @@
   %c = icmp eq <4 x i16> %b, zeroinitializer
   %xx = zext <4 x i16> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -174,7 +174,7 @@
   %c = icmp eq <4 x i16> %b, zeroinitializer
   %xx = sext <4 x i16> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -188,7 +188,7 @@
 entry:
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   ret i16 %z
 }
 
@@ -314,7 +314,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = zext <8 x i16> %x to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -456,7 +456,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = sext <8 x i16> %x to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -492,7 +492,7 @@
   %c = icmp eq <2 x i16> %b, zeroinitializer
   %xx = zext <2 x i16> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -537,7 +537,7 @@
   %c = icmp eq <2 x i16> %b, zeroinitializer
   %xx = sext <2 x i16> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -551,7 +551,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = zext <16 x i8> %x to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -565,7 +565,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = sext <16 x i8> %x to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -582,7 +582,7 @@
   %c = icmp eq <4 x i8> %b, zeroinitializer
   %xx = zext <4 x i8> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -600,7 +600,7 @@
   %c = icmp eq <4 x i8> %b, zeroinitializer
   %xx = sext <4 x i8> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -615,7 +615,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = zext <16 x i8> %x to <16 x i16>
   %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   ret i16 %z
 }
 
@@ -630,7 +630,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = sext <16 x i8> %x to <16 x i16>
   %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   ret i16 %z
 }
 
@@ -647,7 +647,7 @@
   %c = icmp eq <8 x i8> %b, zeroinitializer
   %xx = zext <8 x i8> %x to <8 x i16>
   %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   ret i16 %z
 }
 
@@ -664,7 +664,7 @@
   %c = icmp eq <8 x i8> %b, zeroinitializer
   %xx = sext <8 x i8> %x to <8 x i16>
   %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   ret i16 %z
 }
 
@@ -678,7 +678,7 @@
 entry:
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
   ret i8 %z
 }
 
@@ -948,7 +948,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = zext <16 x i8> %x to <16 x i64>
   %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   ret i64 %z
 }
 
@@ -1257,7 +1257,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = sext <16 x i8> %x to <16 x i64>
   %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   ret i64 %z
 }
 
@@ -1293,7 +1293,7 @@
   %c = icmp eq <2 x i8> %b, zeroinitializer
   %xx = zext <2 x i8> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -1338,7 +1338,7 @@
   %c = icmp eq <2 x i8> %b, zeroinitializer
   %xx = sext <2 x i8> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -1372,7 +1372,7 @@
 entry:
   %c = icmp eq <2 x i64> %b, zeroinitializer
   %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -1385,7 +1385,7 @@
 entry:
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1400,7 +1400,7 @@
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %xx = zext <4 x i32> %x to <4 x i64>
   %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1415,7 +1415,7 @@
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %xx = sext <4 x i32> %x to <4 x i64>
   %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1455,7 +1455,7 @@
   %c = icmp eq <2 x i32> %b, zeroinitializer
   %xx = zext <2 x i32> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1501,7 +1501,7 @@
   %c = icmp eq <2 x i32> %b, zeroinitializer
   %xx = sext <2 x i32> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1516,7 +1516,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = zext <8 x i16> %x to <8 x i32>
   %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1531,7 +1531,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = sext <8 x i16> %x to <8 x i32>
   %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1548,7 +1548,7 @@
   %c = icmp eq <4 x i16> %b, zeroinitializer
   %xx = zext <4 x i16> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1565,7 +1565,7 @@
   %c = icmp eq <4 x i16> %b, zeroinitializer
   %xx = sext <4 x i16> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1580,7 +1580,7 @@
 entry:
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1711,7 +1711,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = zext <8 x i16> %x to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1858,7 +1858,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %xx = sext <8 x i16> %x to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1897,7 +1897,7 @@
   %c = icmp eq <2 x i16> %b, zeroinitializer
   %xx = zext <2 x i16> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1947,7 +1947,7 @@
   %c = icmp eq <2 x i16> %b, zeroinitializer
   %xx = sext <2 x i16> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1962,7 +1962,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = zext <16 x i8> %x to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1977,7 +1977,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = sext <16 x i8> %x to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1995,7 +1995,7 @@
   %c = icmp eq <4 x i8> %b, zeroinitializer
   %xx = zext <4 x i8> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -2014,7 +2014,7 @@
   %c = icmp eq <4 x i8> %b, zeroinitializer
   %xx = sext <4 x i8> %x to <4 x i32>
   %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -2030,7 +2030,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = zext <16 x i8> %x to <16 x i16>
   %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2046,7 +2046,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = sext <16 x i8> %x to <16 x i16>
   %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2064,7 +2064,7 @@
   %c = icmp eq <8 x i8> %b, zeroinitializer
   %xx = zext <8 x i8> %x to <8 x i16>
   %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2082,7 +2082,7 @@
   %c = icmp eq <8 x i8> %b, zeroinitializer
   %xx = sext <8 x i8> %x to <8 x i16>
   %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2097,7 +2097,7 @@
 entry:
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
   %r = add i8 %z, %a
   ret i8 %r
 }
@@ -2372,7 +2372,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = zext <16 x i8> %x to <16 x i64>
   %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -2686,7 +2686,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %xx = sext <16 x i8> %x to <16 x i64>
   %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -2725,7 +2725,7 @@
   %c = icmp eq <2 x i8> %b, zeroinitializer
   %xx = zext <2 x i8> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -2775,7 +2775,7 @@
   %c = icmp eq <2 x i8> %b, zeroinitializer
   %xx = sext <2 x i8> %x to <2 x i64>
   %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -2814,18 +2814,18 @@
 entry:
   %c = icmp eq <2 x i64> %b, zeroinitializer
   %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll
index fc06181..cf9c2b6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x)
   ret i32 %z
 }
 
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x)
   ret i32 %z
 }
 
@@ -42,7 +42,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x)
   ret i32 %z
 }
 
@@ -58,7 +58,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x)
   ret i16 %z
 }
 
@@ -76,7 +76,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x)
   ret i16 %z
 }
 
@@ -95,7 +95,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x)
   ret i16 %z
 }
 
@@ -113,7 +113,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x)
   ret i8 %z
 }
 
@@ -133,7 +133,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x)
   ret i8 %z
 }
 
@@ -154,7 +154,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x)
   ret i8 %z
 }
 
@@ -163,7 +163,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x)
   ret i64 %z
 }
 
@@ -178,7 +178,7 @@
 ; CHECK-NEXT:    ands r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x)
   ret i64 %z
 }
 
@@ -194,7 +194,7 @@
 ; CHECK-NEXT:    ands r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x)
   ret i64 %z
 }
 
@@ -207,7 +207,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x)
   %r = and i32 %y, %z
   ret i32 %r
 }
@@ -225,7 +225,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x)
   %r = and i32 %y, %z
   ret i32 %r
 }
@@ -244,7 +244,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x)
   %r = and i32 %y, %z
   ret i32 %r
 }
@@ -262,7 +262,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x)
   %r = and i16 %y, %z
   ret i16 %r
 }
@@ -282,7 +282,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x)
   %r = and i16 %y, %z
   ret i16 %r
 }
@@ -303,7 +303,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x)
   %r = and i16 %y, %z
   ret i16 %r
 }
@@ -323,7 +323,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x)
   %r = and i8 %y, %z
   ret i8 %r
 }
@@ -345,7 +345,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x)
   %r = and i8 %y, %z
   ret i8 %r
 }
@@ -368,7 +368,7 @@
 ; CHECK-NEXT:    ands r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x)
   %r = and i8 %y, %z
   ret i8 %r
 }
@@ -380,7 +380,7 @@
 ; CHECK-NEXT:    ands r1, r3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x)
   %r = and i64 %y, %z
   ret i64 %r
 }
@@ -398,7 +398,7 @@
 ; CHECK-NEXT:    ands r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x)
   %r = and i64 %y, %z
   ret i64 %r
 }
@@ -417,7 +417,7 @@
 ; CHECK-NEXT:    ands r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x)
   %r = and i64 %y, %z
   ret i64 %r
 }
@@ -430,7 +430,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x)
   ret i32 %z
 }
 
@@ -446,7 +446,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x)
   ret i32 %z
 }
 
@@ -463,7 +463,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x)
   ret i32 %z
 }
 
@@ -479,7 +479,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x)
   ret i16 %z
 }
 
@@ -497,7 +497,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x)
   ret i16 %z
 }
 
@@ -516,7 +516,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x)
   ret i16 %z
 }
 
@@ -534,7 +534,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x)
   ret i8 %z
 }
 
@@ -554,7 +554,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x)
   ret i8 %z
 }
 
@@ -575,7 +575,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x)
   ret i8 %z
 }
 
@@ -584,7 +584,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x)
   ret i64 %z
 }
 
@@ -599,7 +599,7 @@
 ; CHECK-NEXT:    orrs r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x)
   ret i64 %z
 }
 
@@ -615,7 +615,7 @@
 ; CHECK-NEXT:    orrs r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x)
   ret i64 %z
 }
 
@@ -628,7 +628,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x)
   %r = or i32 %y, %z
   ret i32 %r
 }
@@ -646,7 +646,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x)
   %r = or i32 %y, %z
   ret i32 %r
 }
@@ -665,7 +665,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x)
   %r = or i32 %y, %z
   ret i32 %r
 }
@@ -683,7 +683,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x)
   %r = or i16 %y, %z
   ret i16 %r
 }
@@ -703,7 +703,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x)
   %r = or i16 %y, %z
   ret i16 %r
 }
@@ -724,7 +724,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x)
   %r = or i16 %y, %z
   ret i16 %r
 }
@@ -744,7 +744,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x)
   %r = or i8 %y, %z
   ret i8 %r
 }
@@ -766,7 +766,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x)
   %r = or i8 %y, %z
   ret i8 %r
 }
@@ -789,7 +789,7 @@
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x)
   %r = or i8 %y, %z
   ret i8 %r
 }
@@ -801,7 +801,7 @@
 ; CHECK-NEXT:    orrs r1, r3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x)
   %r = or i64 %y, %z
   ret i64 %r
 }
@@ -819,7 +819,7 @@
 ; CHECK-NEXT:    orrs r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x)
   %r = or i64 %y, %z
   ret i64 %r
 }
@@ -838,7 +838,7 @@
 ; CHECK-NEXT:    orrs r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x)
   %r = or i64 %y, %z
   ret i64 %r
 }
@@ -851,7 +851,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x)
   ret i32 %z
 }
 
@@ -867,7 +867,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x)
   ret i32 %z
 }
 
@@ -884,7 +884,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x)
   ret i32 %z
 }
 
@@ -900,7 +900,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x)
   ret i16 %z
 }
 
@@ -918,7 +918,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x)
   ret i16 %z
 }
 
@@ -937,7 +937,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x)
   ret i16 %z
 }
 
@@ -955,7 +955,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x)
   ret i8 %z
 }
 
@@ -975,7 +975,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x)
   ret i8 %z
 }
 
@@ -996,7 +996,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x)
   ret i8 %z
 }
 
@@ -1005,7 +1005,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x)
   ret i64 %z
 }
 
@@ -1020,7 +1020,7 @@
 ; CHECK-NEXT:    eors r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x)
   ret i64 %z
 }
 
@@ -1036,7 +1036,7 @@
 ; CHECK-NEXT:    eors r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x)
   ret i64 %z
 }
 
@@ -1049,7 +1049,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x)
   %r = xor i32 %y, %z
   ret i32 %r
 }
@@ -1067,7 +1067,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x)
   %r = xor i32 %y, %z
   ret i32 %r
 }
@@ -1086,7 +1086,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x)
   %r = xor i32 %y, %z
   ret i32 %r
 }
@@ -1104,7 +1104,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x)
   %r = xor i16 %y, %z
   ret i16 %r
 }
@@ -1124,7 +1124,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x)
   %r = xor i16 %y, %z
   ret i16 %r
 }
@@ -1145,7 +1145,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x)
   %r = xor i16 %y, %z
   ret i16 %r
 }
@@ -1165,7 +1165,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x)
   %r = xor i8 %y, %z
   ret i8 %r
 }
@@ -1187,7 +1187,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x)
   %r = xor i8 %y, %z
   ret i8 %r
 }
@@ -1210,7 +1210,7 @@
 ; CHECK-NEXT:    eors r0, r1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x)
   %r = xor i8 %y, %z
   ret i8 %r
 }
@@ -1222,7 +1222,7 @@
 ; CHECK-NEXT:    eors r1, r3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x)
   %r = xor i64 %y, %z
   ret i64 %r
 }
@@ -1240,7 +1240,7 @@
 ; CHECK-NEXT:    eors r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x)
   %r = xor i64 %y, %z
   ret i64 %r
 }
@@ -1259,44 +1259,44 @@
 ; CHECK-NEXT:    eors r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x)
   %r = xor i64 %y, %z
   ret i64 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>)
-declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>)
-declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>)
+declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
index 77f0c77..8ead4f5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    vadd.f32 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float %y, <2 x float> %x)
   ret float %z
 }
 
@@ -30,7 +30,7 @@
 ; CHECK-NOFP-NEXT:    vadd.f32 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %y, <4 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %y, <4 x float> %x)
   ret float %z
 }
 
@@ -56,7 +56,7 @@
 ; CHECK-NOFP-NEXT:    vadd.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %y, <8 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %y, <8 x float> %x)
   ret float %z
 }
 
@@ -71,7 +71,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half %y, <2 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fadd.f16.v2f16(half %y, <2 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -102,7 +102,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half %y, <4 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -139,7 +139,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half %y, <8 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fadd.f16.v8f16(half %y, <8 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -189,7 +189,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half %y, <16 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fadd.f16.v16f16(half %y, <16 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -200,7 +200,7 @@
 ; CHECK-NEXT:    vadd.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double %y, <1 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fadd.f64.v1f64(double %y, <1 x double> %x)
   ret double %z
 }
 
@@ -211,7 +211,7 @@
 ; CHECK-NEXT:    vadd.f64 d0, d2, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %y, <2 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double %y, <2 x double> %x)
   ret double %z
 }
 
@@ -224,7 +224,7 @@
 ; CHECK-NEXT:    vadd.f64 d0, d4, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %y, <4 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %y, <4 x double> %x)
   ret double %z
 }
 
@@ -235,7 +235,7 @@
 ; CHECK-NEXT:    vadd.f32 s0, s4, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x)
+  %z = call float @llvm.vector.reduce.fadd.f32.v2f32(float %y, <2 x float> %x)
   ret float %z
 }
 
@@ -248,7 +248,7 @@
 ; CHECK-NEXT:    vadd.f32 s0, s4, s3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %y, <4 x float> %x)
+  %z = call float @llvm.vector.reduce.fadd.f32.v4f32(float %y, <4 x float> %x)
   ret float %z
 }
 
@@ -265,7 +265,7 @@
 ; CHECK-NEXT:    vadd.f32 s0, s0, s7
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %y, <8 x float> %x)
+  %z = call float @llvm.vector.reduce.fadd.f32.v8f32(float %y, <8 x float> %x)
   ret float %z
 }
 
@@ -283,7 +283,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x)
+  %z = call half @llvm.vector.reduce.fadd.f16.v4f16(half %y, <4 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -308,7 +308,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half %y, <8 x half> %x)
+  %z = call half @llvm.vector.reduce.fadd.f16.v8f16(half %y, <8 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -345,7 +345,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half %y, <16 x half> %x)
+  %z = call half @llvm.vector.reduce.fadd.f16.v16f16(half %y, <16 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -356,7 +356,7 @@
 ; CHECK-NEXT:    vadd.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double %y, <1 x double> %x)
+  %z = call double @llvm.vector.reduce.fadd.f64.v1f64(double %y, <1 x double> %x)
   ret double %z
 }
 
@@ -367,7 +367,7 @@
 ; CHECK-NEXT:    vadd.f64 d0, d2, d1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %y, <2 x double> %x)
+  %z = call double @llvm.vector.reduce.fadd.f64.v2f64(double %y, <2 x double> %x)
   ret double %z
 }
 
@@ -380,17 +380,17 @@
 ; CHECK-NEXT:    vadd.f64 d0, d0, d3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %y, <4 x double> %x)
+  %z = call double @llvm.vector.reduce.fadd.f64.v4f64(double %y, <4 x double> %x)
   ret double %z
 }
 
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half, <16 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half, <2 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half, <8 x half>)
+declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
+declare half @llvm.vector.reduce.fadd.f16.v2f16(half, <2 x half>)
+declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index a83fa68..45c6972 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT:    vminnm.f32 s0, s0, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
   ret float %z
 }
 
@@ -27,7 +27,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f32 s0, s4, s3
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
   ret float %z
 }
 
@@ -60,7 +60,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f32 s0, s2, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
   ret float %z
 }
 
@@ -83,7 +83,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   ret half %z
 }
 
@@ -112,7 +112,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   ret half %z
 }
 
@@ -170,7 +170,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   ret half %z
 }
 
@@ -179,7 +179,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
   ret double %z
 }
 
@@ -189,7 +189,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d0, d1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
   ret double %z
 }
 
@@ -205,7 +205,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d0, d4
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
   ret double %z
 }
 
@@ -215,7 +215,7 @@
 ; CHECK-NEXT:    vminnm.f32 s0, s0, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x)
+  %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
   ret float %z
 }
 
@@ -234,7 +234,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f32 s0, s4, s3
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x)
+  %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
   ret float %z
 }
 
@@ -258,7 +258,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x)
+  %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
   ret float %z
 }
 
@@ -281,7 +281,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x)
+  %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   ret half %z
 }
 
@@ -310,7 +310,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
+  %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   ret half %z
 }
 
@@ -352,7 +352,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
+  %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   ret half %z
 }
 
@@ -361,7 +361,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x)
+  %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
   ret double %z
 }
 
@@ -371,7 +371,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d0, d1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x)
+  %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
   ret double %z
 }
 
@@ -383,7 +383,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d0, d4
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x)
+  %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
   ret double %z
 }
 
@@ -394,7 +394,7 @@
 ; CHECK-NEXT:    vminnm.f32 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
   %c = fcmp fast olt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -417,7 +417,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f32 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
   %c = fcmp fast olt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -453,7 +453,7 @@
 ; CHECK-NOFP-NEXT:    vminnm.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
   %c = fcmp fast olt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -485,7 +485,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -503,7 +503,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -542,7 +542,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -610,7 +610,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   %c = fcmp fast olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -623,7 +623,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
   %c = fcmp fast olt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -636,7 +636,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d2, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
   %c = fcmp fast olt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -655,7 +655,7 @@
 ; CHECK-NEXT:    vminnm.f64 d0, d4, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
   %c = fcmp fast olt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -670,7 +670,7 @@
 ; CHECK-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %x)
+  %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
   %c = fcmp olt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -697,7 +697,7 @@
 ; CHECK-NOFP-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %x)
+  %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
   %c = fcmp olt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -729,7 +729,7 @@
 ; CHECK-NOFP-NEXT:    vselgt.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %x)
+  %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
   %c = fcmp olt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -765,7 +765,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x)
+  %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
   %c = fcmp olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -808,7 +808,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
+  %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
   %c = fcmp olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -864,7 +864,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
+  %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
   %c = fcmp olt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -879,7 +879,7 @@
 ; CHECK-NEXT:    vselgt.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %x)
+  %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
   %c = fcmp olt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -894,7 +894,7 @@
 ; CHECK-NEXT:    vselgt.f64 d0, d2, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %x)
+  %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
   %c = fcmp olt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -911,7 +911,7 @@
 ; CHECK-NEXT:    vselgt.f64 d0, d4, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %x)
+  %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
   %c = fcmp olt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -923,7 +923,7 @@
 ; CHECK-NEXT:    vmaxnm.f32 s0, s0, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
   ret float %z
 }
 
@@ -942,7 +942,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s4, s3
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
   ret float %z
 }
 
@@ -974,7 +974,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s2, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
   ret float %z
 }
 
@@ -997,7 +997,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   ret half %z
 }
 
@@ -1026,7 +1026,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   ret half %z
 }
 
@@ -1084,7 +1084,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   ret half %z
 }
 
@@ -1093,7 +1093,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
   ret double %z
 }
 
@@ -1103,7 +1103,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d0, d1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
   ret double %z
 }
 
@@ -1119,7 +1119,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d0, d4
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
   ret double %z
 }
 
@@ -1129,7 +1129,7 @@
 ; CHECK-NEXT:    vmaxnm.f32 s0, s0, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x)
+  %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
   ret float %z
 }
 
@@ -1148,7 +1148,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s4, s3
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x)
+  %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
   ret float %z
 }
 
@@ -1172,7 +1172,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x)
+  %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
   ret float %z
 }
 
@@ -1195,7 +1195,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x)
+  %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   ret half %z
 }
 
@@ -1224,7 +1224,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
+  %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   ret half %z
 }
 
@@ -1266,7 +1266,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
+  %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   ret half %z
 }
 
@@ -1275,7 +1275,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x)
+  %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
   ret double %z
 }
 
@@ -1285,7 +1285,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d0, d1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x)
+  %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
   ret double %z
 }
 
@@ -1297,7 +1297,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d0, d4
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x)
+  %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
   ret double %z
 }
 
@@ -1308,7 +1308,7 @@
 ; CHECK-NEXT:    vmaxnm.f32 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
   %c = fcmp fast ogt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -1331,7 +1331,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
   %c = fcmp fast ogt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -1367,7 +1367,7 @@
 ; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
   %c = fcmp fast ogt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -1384,7 +1384,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1417,7 +1417,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1456,7 +1456,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1524,7 +1524,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   %c = fcmp fast ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1537,7 +1537,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
   %c = fcmp fast ogt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -1550,7 +1550,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d2, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
   %c = fcmp fast ogt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -1569,7 +1569,7 @@
 ; CHECK-NEXT:    vmaxnm.f64 d0, d4, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
   %c = fcmp fast ogt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -1584,7 +1584,7 @@
 ; CHECK-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %x)
+  %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
   %c = fcmp ogt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -1611,7 +1611,7 @@
 ; CHECK-NOFP-NEXT:    vselgt.f32 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %x)
+  %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
   %c = fcmp ogt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -1643,7 +1643,7 @@
 ; CHECK-NOFP-NEXT:    vselgt.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %x)
+  %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
   %c = fcmp ogt float %y, %z
   %r = select i1 %c, float %y, float %z
   ret float %r
@@ -1679,7 +1679,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x)
+  %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
   %c = fcmp ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1722,7 +1722,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
+  %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
   %c = fcmp ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1778,7 +1778,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
+  %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
   %c = fcmp ogt half %y, %z
   %r = select i1 %c, half %y, half %z
   store half %r, half* %yy
@@ -1793,7 +1793,7 @@
 ; CHECK-NEXT:    vselgt.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %x)
+  %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
   %c = fcmp ogt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -1808,7 +1808,7 @@
 ; CHECK-NEXT:    vselgt.f64 d0, d2, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %x)
+  %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
   %c = fcmp ogt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
@@ -1825,29 +1825,29 @@
 ; CHECK-NEXT:    vselgt.f64 d0, d4, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %x)
+  %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
   %c = fcmp ogt double %y, %z
   %r = select i1 %c, double %y, double %z
   ret double %r
 }
 
-declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>)
-declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
-declare half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>)
-declare half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>)
-declare half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half>)
+declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
+declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
+declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll
index 89d1546..940e2e2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fmul.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    vmul.f32 s0, s4, s0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %y, <2 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float %y, <2 x float> %x)
   ret float %z
 }
 
@@ -30,7 +30,7 @@
 ; CHECK-NOFP-NEXT:    vmul.f32 s0, s4, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %y, <4 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %y, <4 x float> %x)
   ret float %z
 }
 
@@ -56,7 +56,7 @@
 ; CHECK-NOFP-NEXT:    vmul.f32 s0, s8, s0
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
-  %z = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %y, <8 x float> %x)
+  %z = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float %y, <8 x float> %x)
   ret float %z
 }
 
@@ -71,7 +71,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v2f16(half %y, <2 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmul.f16.v2f16(half %y, <2 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -102,7 +102,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half %y, <4 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half %y, <4 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -139,7 +139,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v8f16(half %y, <8 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmul.f16.v8f16(half %y, <8 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -189,7 +189,7 @@
 ; CHECK-NOFP-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v16f16(half %y, <16 x half> %x)
+  %z = call fast half @llvm.vector.reduce.fmul.f16.v16f16(half %y, <16 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -200,7 +200,7 @@
 ; CHECK-NEXT:    vmul.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double %y, <1 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmul.f64.v1f64(double %y, <1 x double> %x)
   ret double %z
 }
 
@@ -211,7 +211,7 @@
 ; CHECK-NEXT:    vmul.f64 d0, d2, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %y, <2 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double %y, <2 x double> %x)
   ret double %z
 }
 
@@ -224,7 +224,7 @@
 ; CHECK-NEXT:    vmul.f64 d0, d4, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %y, <4 x double> %x)
+  %z = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double %y, <4 x double> %x)
   ret double %z
 }
 
@@ -235,7 +235,7 @@
 ; CHECK-NEXT:    vmul.f32 s0, s4, s1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %y, <2 x float> %x)
+  %z = call float @llvm.vector.reduce.fmul.f32.v2f32(float %y, <2 x float> %x)
   ret float %z
 }
 
@@ -248,7 +248,7 @@
 ; CHECK-NEXT:    vmul.f32 s0, s4, s3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %y, <4 x float> %x)
+  %z = call float @llvm.vector.reduce.fmul.f32.v4f32(float %y, <4 x float> %x)
   ret float %z
 }
 
@@ -265,7 +265,7 @@
 ; CHECK-NEXT:    vmul.f32 s0, s0, s7
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %y, <8 x float> %x)
+  %z = call float @llvm.vector.reduce.fmul.f32.v8f32(float %y, <8 x float> %x)
   ret float %z
 }
 
@@ -280,7 +280,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v2f16(half %y, <2 x half> %x)
+  %z = call half @llvm.vector.reduce.fmul.f16.v2f16(half %y, <2 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -299,7 +299,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half %y, <4 x half> %x)
+  %z = call half @llvm.vector.reduce.fmul.f16.v4f16(half %y, <4 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -324,7 +324,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v8f16(half %y, <8 x half> %x)
+  %z = call half @llvm.vector.reduce.fmul.f16.v8f16(half %y, <8 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -361,7 +361,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %y = load half, half* %yy
-  %z = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v16f16(half %y, <16 x half> %x)
+  %z = call half @llvm.vector.reduce.fmul.f16.v16f16(half %y, <16 x half> %x)
   store half %z, half* %yy
   ret void
 }
@@ -372,7 +372,7 @@
 ; CHECK-NEXT:    vmul.f64 d0, d1, d0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double %y, <1 x double> %x)
+  %z = call double @llvm.vector.reduce.fmul.f64.v1f64(double %y, <1 x double> %x)
   ret double %z
 }
 
@@ -383,7 +383,7 @@
 ; CHECK-NEXT:    vmul.f64 d0, d2, d1
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %y, <2 x double> %x)
+  %z = call double @llvm.vector.reduce.fmul.f64.v2f64(double %y, <2 x double> %x)
   ret double %z
 }
 
@@ -396,17 +396,17 @@
 ; CHECK-NEXT:    vmul.f64 d0, d0, d3
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %y, <4 x double> %x)
+  %z = call double @llvm.vector.reduce.fmul.f64.v4f64(double %y, <4 x double> %x)
   ret double %z
 }
 
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double, <4 x double>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float, <8 x float>)
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v16f16(half, <16 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v2f16(half, <2 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half, <4 x half>)
-declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v8f16(half, <8 x half>)
+declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare half @llvm.vector.reduce.fmul.f16.v16f16(half, <16 x half>)
+declare half @llvm.vector.reduce.fmul.f16.v2f16(half, <2 x half>)
+declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fmul.f16.v8f16(half, <8 x half>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
index 2862779..2544474 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
@@ -65,7 +65,7 @@
   %0 = getelementptr inbounds i32, i32* %x, i32 %index
   %1 = bitcast i32* %0 to <4 x i32>*
   %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
-  %2 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %wide.load)
+  %2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load)
   %3 = add i32 %2, %vec.phi
   %index.next = add i32 %index, 4
   %4 = icmp eq i32 %index.next, %n.vec
@@ -167,7 +167,7 @@
   br i1 %3, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %2)
+  %4 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %2)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -267,7 +267,7 @@
   br i1 %3, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %2)
+  %4 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %2)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -367,7 +367,7 @@
   br i1 %3, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %2)
+  %4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %2)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -467,7 +467,7 @@
   br i1 %3, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %2)
+  %4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %2)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -568,7 +568,7 @@
   br i1 %3, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %4 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %2)
+  %4 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %2)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -665,7 +665,7 @@
   br i1 %3, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %4 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %2)
+  %4 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %2)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -762,7 +762,7 @@
   br i1 %4, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %5 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %3)
+  %5 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %3)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -852,7 +852,7 @@
   %0 = getelementptr inbounds i32, i32* %x, i32 %index
   %1 = bitcast i32* %0 to <4 x i32>*
   %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
-  %l5 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %wide.load)
+  %l5 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %wide.load)
   %2 = icmp slt i32 %vec.phi, %l5
   %3 = select i1 %2, i32 %vec.phi, i32 %l5
   %index.next = add i32 %index, 4
@@ -958,7 +958,7 @@
   br i1 %4, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %5 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %3)
+  %5 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %3)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -1048,7 +1048,7 @@
   %0 = getelementptr inbounds i32, i32* %x, i32 %index
   %1 = bitcast i32* %0 to <4 x i32>*
   %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
-  %l5 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %wide.load)
+  %l5 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %wide.load)
   %2 = icmp sgt i32 %vec.phi, %l5
   %3 = select i1 %2, i32 %vec.phi, i32 %l5
   %index.next = add i32 %index, 4
@@ -1154,7 +1154,7 @@
   br i1 %4, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %5 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %3)
+  %5 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %3)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -1244,7 +1244,7 @@
   %0 = getelementptr inbounds i32, i32* %x, i32 %index
   %1 = bitcast i32* %0 to <4 x i32>*
   %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
-  %l5 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %wide.load)
+  %l5 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %wide.load)
   %2 = icmp ult i32 %vec.phi, %l5
   %3 = select i1 %2, i32 %vec.phi, i32 %l5
   %index.next = add i32 %index, 4
@@ -1350,7 +1350,7 @@
   br i1 %4, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %5 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %3)
+  %5 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %3)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -1440,7 +1440,7 @@
   %0 = getelementptr inbounds i32, i32* %x, i32 %index
   %1 = bitcast i32* %0 to <4 x i32>*
   %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
-  %l5 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %wide.load)
+  %l5 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %wide.load)
   %2 = icmp ugt i32 %vec.phi, %l5
   %3 = select i1 %2, i32 %vec.phi, i32 %l5
   %index.next = add i32 %index, 4
@@ -1553,7 +1553,7 @@
   br i1 %4, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %5 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %3)
+  %5 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %3)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -1658,7 +1658,7 @@
   br i1 %4, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body
-  %5 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %3)
+  %5 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %3)
   %cmp.n = icmp eq i32 %n.vec, %n
   br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1
 
@@ -1722,7 +1722,7 @@
   %1 = bitcast i32* %0 to <4 x i32>*
   %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
   %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer
-  %3 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %2)
+  %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2)
   %4 = add i32 %3, %vec.phi
   %index.next = add i32 %index, 4
   %5 = icmp eq i32 %index.next, %n.vec
@@ -1777,7 +1777,7 @@
   %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
   %4 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load
   %5 = select <4 x i1> %active.lane.mask, <4 x i32> %4, <4 x i32> zeroinitializer
-  %6 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %5)
+  %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5)
   %7 = add i32 %6, %vec.phi
   %index.next = add i32 %index, 4
   %8 = icmp eq i32 %index.next, %n.vec
@@ -1828,7 +1828,7 @@
   %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
   %2 = sext <8 x i16> %wide.masked.load to <8 x i32>
   %3 = select <8 x i1> %active.lane.mask, <8 x i32> %2, <8 x i32> zeroinitializer
-  %4 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %3)
+  %4 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3)
   %5 = add i32 %4, %vec.phi
   %index.next = add i32 %index, 8
   %6 = icmp eq i32 %index.next, %n.vec
@@ -1885,7 +1885,7 @@
   %5 = sext <8 x i16> %wide.masked.load14 to <8 x i32>
   %6 = mul nsw <8 x i32> %5, %2
   %7 = select <8 x i1> %active.lane.mask, <8 x i32> %6, <8 x i32> zeroinitializer
-  %8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %7)
+  %8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %7)
   %9 = add i32 %8, %vec.phi
   %index.next = add i32 %index, 8
   %10 = icmp eq i32 %index.next, %n.vec
@@ -1936,7 +1936,7 @@
   %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
   %2 = zext <16 x i8> %wide.masked.load to <16 x i32>
   %3 = select <16 x i1> %active.lane.mask, <16 x i32> %2, <16 x i32> zeroinitializer
-  %4 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %3)
+  %4 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
   %5 = add i32 %4, %vec.phi
   %index.next = add i32 %index, 16
   %6 = icmp eq i32 %index.next, %n.vec
@@ -1993,7 +1993,7 @@
   %5 = zext <16 x i8> %wide.masked.load14 to <16 x i32>
   %6 = mul nuw nsw <16 x i32> %5, %2
   %7 = select <16 x i1> %active.lane.mask, <16 x i32> %6, <16 x i32> zeroinitializer
-  %8 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %7)
+  %8 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %7)
   %9 = add i32 %8, %vec.phi
   %index.next = add i32 %index, 16
   %10 = icmp eq i32 %index.next, %n.vec
@@ -2043,7 +2043,7 @@
   %1 = bitcast i16* %0 to <8 x i16>*
   %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
   %2 = select <8 x i1> %active.lane.mask, <8 x i16> %wide.masked.load, <8 x i16> zeroinitializer
-  %3 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %2)
+  %3 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %2)
   %4 = add i16 %3, %vec.phi
   %index.next = add i32 %index, 8
   %5 = icmp eq i32 %index.next, %n.vec
@@ -2098,7 +2098,7 @@
   %wide.masked.load16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %3, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef)
   %4 = mul <8 x i16> %wide.masked.load16, %wide.masked.load
   %5 = select <8 x i1> %active.lane.mask, <8 x i16> %4, <8 x i16> zeroinitializer
-  %6 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %5)
+  %6 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %5)
   %7 = add i16 %6, %vec.phi
   %index.next = add i32 %index, 8
   %8 = icmp eq i32 %index.next, %n.vec
@@ -2149,7 +2149,7 @@
   %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
   %2 = zext <16 x i8> %wide.masked.load to <16 x i16>
   %3 = select <16 x i1> %active.lane.mask, <16 x i16> %2, <16 x i16> zeroinitializer
-  %4 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %3)
+  %4 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %3)
   %5 = add i16 %4, %vec.phi
   %index.next = add i32 %index, 16
   %6 = icmp eq i32 %index.next, %n.vec
@@ -2206,7 +2206,7 @@
   %5 = zext <16 x i8> %wide.masked.load18 to <16 x i16>
   %6 = mul nuw <16 x i16> %5, %2
   %7 = select <16 x i1> %active.lane.mask, <16 x i16> %6, <16 x i16> zeroinitializer
-  %8 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %7)
+  %8 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %7)
   %9 = add i16 %8, %vec.phi
   %index.next = add i32 %index, 16
   %10 = icmp eq i32 %index.next, %n.vec
@@ -2256,7 +2256,7 @@
   %1 = bitcast i8* %0 to <16 x i8>*
   %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
   %2 = select <16 x i1> %active.lane.mask, <16 x i8> %wide.masked.load, <16 x i8> zeroinitializer
-  %3 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %2)
+  %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %2)
   %4 = add i8 %3, %vec.phi
   %index.next = add i32 %index, 16
   %5 = icmp eq i32 %index.next, %n.vec
@@ -2311,7 +2311,7 @@
   %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef)
   %4 = mul <16 x i8> %wide.masked.load15, %wide.masked.load
   %5 = select <16 x i1> %active.lane.mask, <16 x i8> %4, <16 x i8> zeroinitializer
-  %6 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %5)
+  %6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %5)
   %7 = add i8 %6, %vec.phi
   %index.next = add i32 %index, 16
   %8 = icmp eq i32 %index.next, %n.vec
@@ -2364,7 +2364,7 @@
   %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
   %2 = sext <4 x i32> %wide.masked.load to <4 x i64>
   %3 = select <4 x i1> %active.lane.mask, <4 x i64> %2, <4 x i64> zeroinitializer
-  %4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %3)
+  %4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3)
   %5 = add i64 %4, %vec.phi
   %index.next = add i32 %index, 4
   %6 = icmp eq i32 %index.next, %n.vec
@@ -2423,7 +2423,7 @@
   %5 = sext <4 x i32> %wide.masked.load14 to <4 x i64>
   %6 = mul nsw <4 x i64> %5, %2
   %7 = select <4 x i1> %active.lane.mask, <4 x i64> %6, <4 x i64> zeroinitializer
-  %8 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %7)
+  %8 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %7)
   %9 = add i64 %8, %vec.phi
   %index.next = add i32 %index, 4
   %10 = icmp eq i32 %index.next, %n.vec
@@ -2482,7 +2482,7 @@
   %5 = sext <8 x i16> %wide.masked.load14 to <8 x i64>
   %6 = mul nsw <8 x i64> %5, %2
   %7 = select <8 x i1> %active.lane.mask, <8 x i64> %6, <8 x i64> zeroinitializer
-  %8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %7)
+  %8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %7)
   %9 = add i64 %8, %vec.phi
   %index.next = add i32 %index, 8
   %10 = icmp eq i32 %index.next, %n.vec
@@ -2497,26 +2497,26 @@
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2
 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1
 declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>) #3
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #3
 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) #1
 declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #2
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>) #3
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) #3
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>) #3
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) #3
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>) #3
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>) #3
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #3
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #3
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #3
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #3
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) #3
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) #3
 
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
-declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
index b83b51b..ee15f82 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %m = mul <4 x i32> %x, %y
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   ret i32 %z
 }
 
@@ -21,7 +21,7 @@
   %xx = zext <4 x i32> %x to <4 x i64>
   %yy = zext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m)
   ret i64 %z
 }
 
@@ -34,7 +34,7 @@
   %xx = sext <4 x i32> %x to <4 x i64>
   %yy = sext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m)
   ret i64 %z
 }
 
@@ -53,7 +53,7 @@
   %xx = zext <2 x i32> %x to <2 x i64>
   %yy = zext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -72,7 +72,7 @@
   %xx = sext <2 x i32> %x to <2 x i64>
   %yy = sext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -85,7 +85,7 @@
   %xx = zext <8 x i16> %x to <8 x i32>
   %yy = zext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m)
   ret i32 %z
 }
 
@@ -98,7 +98,7 @@
   %xx = sext <8 x i16> %x to <8 x i32>
   %yy = sext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m)
   ret i32 %z
 }
 
@@ -113,7 +113,7 @@
   %xx = zext <4 x i16> %x to <4 x i32>
   %yy = zext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   ret i32 %z
 }
 
@@ -128,7 +128,7 @@
   %xx = sext <4 x i16> %x to <4 x i32>
   %yy = sext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   ret i32 %z
 }
 
@@ -140,7 +140,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %m = mul <8 x i16> %x, %y
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m)
   ret i16 %z
 }
 
@@ -153,7 +153,7 @@
   %xx = zext <8 x i16> %x to <8 x i64>
   %yy = zext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m)
   ret i64 %z
 }
 
@@ -166,7 +166,7 @@
   %xx = sext <8 x i16> %x to <8 x i64>
   %yy = sext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m)
   ret i64 %z
 }
 
@@ -180,7 +180,7 @@
   %yy = zext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %ma = zext <8 x i32> %m to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma)
   ret i64 %z
 }
 
@@ -194,7 +194,7 @@
   %yy = sext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %ma = sext <8 x i32> %m to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma)
   ret i64 %z
 }
 
@@ -207,7 +207,7 @@
   %xx = sext <8 x i16> %x to <8 x i32>
   %m = mul <8 x i32> %xx, %xx
   %ma = zext <8 x i32> %m to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma)
   ret i64 %z
 }
 
@@ -228,7 +228,7 @@
   %xx = zext <2 x i16> %x to <2 x i64>
   %yy = zext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -250,7 +250,7 @@
   %xx = sext <2 x i16> %x to <2 x i64>
   %yy = sext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -263,7 +263,7 @@
   %xx = zext <16 x i8> %x to <16 x i32>
   %yy = zext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m)
   ret i32 %z
 }
 
@@ -276,7 +276,7 @@
   %xx = sext <16 x i8> %x to <16 x i32>
   %yy = sext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m)
   ret i32 %z
 }
 
@@ -290,7 +290,7 @@
   %yy = zext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %ma = zext <16 x i16> %m to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma)
   ret i32 %z
 }
 
@@ -304,7 +304,7 @@
   %yy = sext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %ma = sext <16 x i16> %m to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma)
   ret i32 %z
 }
 
@@ -317,7 +317,7 @@
   %xx = sext <16 x i8> %x to <16 x i16>
   %m = mul <16 x i16> %xx, %xx
   %ma = zext <16 x i16> %m to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma)
   ret i32 %z
 }
 
@@ -333,7 +333,7 @@
   %xx = zext <4 x i8> %x to <4 x i32>
   %yy = zext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   ret i32 %z
 }
 
@@ -350,7 +350,7 @@
   %xx = sext <4 x i8> %x to <4 x i32>
   %yy = sext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   ret i32 %z
 }
 
@@ -364,7 +364,7 @@
   %xx = zext <16 x i8> %x to <16 x i16>
   %yy = zext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m)
   ret i16 %z
 }
 
@@ -378,7 +378,7 @@
   %xx = sext <16 x i8> %x to <16 x i16>
   %yy = sext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m)
   ret i16 %z
 }
 
@@ -394,7 +394,7 @@
   %xx = zext <8 x i8> %x to <8 x i16>
   %yy = zext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m)
   ret i16 %z
 }
 
@@ -410,7 +410,7 @@
   %xx = sext <8 x i8> %x to <8 x i16>
   %yy = sext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m)
   ret i16 %z
 }
 
@@ -422,7 +422,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %m = mul <16 x i8> %x, %y
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %m)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %m)
   ret i8 %z
 }
 
@@ -636,7 +636,7 @@
   %xx = zext <16 x i8> %x to <16 x i64>
   %yy = zext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m)
   ret i64 %z
 }
 
@@ -803,7 +803,7 @@
   %xx = sext <16 x i8> %x to <16 x i64>
   %yy = sext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m)
   ret i64 %z
 }
 
@@ -826,7 +826,7 @@
   %xx = zext <2 x i8> %x to <2 x i64>
   %yy = zext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -848,7 +848,7 @@
   %xx = sext <2 x i8> %x to <2 x i64>
   %yy = sext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -879,7 +879,7 @@
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %m = mul <2 x i64> %x, %y
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   ret i64 %z
 }
 
@@ -890,7 +890,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %m = mul <4 x i32> %x, %y
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -904,7 +904,7 @@
   %xx = zext <4 x i32> %x to <4 x i64>
   %yy = zext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -918,7 +918,7 @@
   %xx = sext <4 x i32> %x to <4 x i64>
   %yy = sext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -942,7 +942,7 @@
   %xx = zext <2 x i32> %x to <2 x i64>
   %yy = zext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -966,7 +966,7 @@
   %xx = sext <2 x i32> %x to <2 x i64>
   %yy = sext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -980,7 +980,7 @@
   %xx = zext <8 x i16> %x to <8 x i32>
   %yy = zext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -994,7 +994,7 @@
   %xx = sext <8 x i16> %x to <8 x i32>
   %yy = sext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1010,7 +1010,7 @@
   %xx = zext <4 x i16> %x to <4 x i32>
   %yy = zext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1026,7 +1026,7 @@
   %xx = sext <4 x i16> %x to <4 x i32>
   %yy = sext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1039,7 +1039,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %m = mul <8 x i16> %x, %y
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1053,7 +1053,7 @@
   %xx = zext <8 x i16> %x to <8 x i64>
   %yy = zext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1067,7 +1067,7 @@
   %xx = sext <8 x i16> %x to <8 x i64>
   %yy = sext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1082,7 +1082,7 @@
   %yy = zext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %ma = zext <8 x i32> %m to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1097,7 +1097,7 @@
   %yy = sext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %ma = sext <8 x i32> %m to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1111,7 +1111,7 @@
   %xx = sext <8 x i16> %x to <8 x i32>
   %m = mul <8 x i32> %xx, %xx
   %ma = zext <8 x i32> %m to <8 x i64>
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %ma)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %ma)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1137,7 +1137,7 @@
   %xx = zext <2 x i16> %x to <2 x i64>
   %yy = zext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1164,7 +1164,7 @@
   %xx = sext <2 x i16> %x to <2 x i64>
   %yy = sext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1178,7 +1178,7 @@
   %xx = zext <16 x i8> %x to <16 x i32>
   %yy = zext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1192,7 +1192,7 @@
   %xx = sext <16 x i8> %x to <16 x i32>
   %yy = sext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1207,7 +1207,7 @@
   %yy = zext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %ma = zext <16 x i16> %m to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1222,7 +1222,7 @@
   %yy = sext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %ma = sext <16 x i16> %m to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1236,7 +1236,7 @@
   %xx = sext <16 x i8> %x to <16 x i16>
   %m = mul <16 x i16> %xx, %xx
   %ma = zext <16 x i16> %m to <16 x i32>
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %ma)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %ma)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1253,7 +1253,7 @@
   %xx = zext <4 x i8> %x to <4 x i32>
   %yy = zext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1271,7 +1271,7 @@
   %xx = sext <4 x i8> %x to <4 x i32>
   %yy = sext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %m)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %m)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1286,7 +1286,7 @@
   %xx = zext <16 x i8> %x to <16 x i16>
   %yy = zext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1301,7 +1301,7 @@
   %xx = sext <16 x i8> %x to <16 x i16>
   %yy = sext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %m)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1318,7 +1318,7 @@
   %xx = zext <8 x i8> %x to <8 x i16>
   %yy = zext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1335,7 +1335,7 @@
   %xx = sext <8 x i8> %x to <8 x i16>
   %yy = sext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %m)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %m)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1348,7 +1348,7 @@
 ; CHECK-NEXT:    bx lr
 entry:
   %m = mul <16 x i8> %x, %y
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %m)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %m)
   %r = add i8 %z, %a
   ret i8 %r
 }
@@ -1565,7 +1565,7 @@
   %xx = zext <16 x i8> %x to <16 x i64>
   %yy = zext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1737,7 +1737,7 @@
   %xx = sext <16 x i8> %x to <16 x i64>
   %yy = sext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1765,7 +1765,7 @@
   %xx = zext <2 x i8> %x to <2 x i64>
   %yy = zext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1792,7 +1792,7 @@
   %xx = sext <2 x i8> %x to <2 x i64>
   %yy = sext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1826,18 +1826,18 @@
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %m = mul <2 x i64> %x, %y
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %m)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %m)
   %r = add i64 %z, %a
   ret i64 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
index 02d1248..72462bb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll
@@ -11,7 +11,7 @@
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %m = mul <4 x i32> %x, %y
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -27,7 +27,7 @@
   %yy = zext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
   %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   ret i64 %z
 }
 
@@ -43,7 +43,7 @@
   %yy = sext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
   %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   ret i64 %z
 }
 
@@ -79,7 +79,7 @@
   %yy = zext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -115,7 +115,7 @@
   %yy = sext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -131,7 +131,7 @@
   %yy = zext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   ret i32 %z
 }
 
@@ -147,7 +147,7 @@
   %yy = sext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   ret i32 %z
 }
 
@@ -166,7 +166,7 @@
   %yy = zext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -185,7 +185,7 @@
   %yy = sext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -200,7 +200,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %m = mul <8 x i16> %x, %y
   %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   ret i16 %z
 }
 
@@ -216,7 +216,7 @@
   %yy = zext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
   %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -232,7 +232,7 @@
   %yy = sext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
   %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -249,7 +249,7 @@
   %m = mul <8 x i32> %xx, %yy
   %ma = zext <8 x i32> %m to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -266,7 +266,7 @@
   %m = mul <8 x i32> %xx, %yy
   %ma = sext <8 x i32> %m to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -282,7 +282,7 @@
   %m = mul <8 x i32> %xx, %xx
   %ma = zext <8 x i32> %m to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   ret i64 %z
 }
 
@@ -334,7 +334,7 @@
   %yy = zext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -385,7 +385,7 @@
   %yy = sext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -401,7 +401,7 @@
   %yy = zext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
   %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -417,7 +417,7 @@
   %yy = sext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
   %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -434,7 +434,7 @@
   %m = mul <16 x i16> %xx, %yy
   %ma = zext <16 x i16> %m to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -451,7 +451,7 @@
   %m = mul <16 x i16> %xx, %yy
   %ma = sext <16 x i16> %m to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -467,7 +467,7 @@
   %m = mul <16 x i16> %xx, %xx
   %ma = zext <16 x i16> %m to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   ret i32 %z
 }
 
@@ -487,7 +487,7 @@
   %yy = zext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -509,7 +509,7 @@
   %yy = sext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   ret i32 %z
 }
 
@@ -526,7 +526,7 @@
   %yy = zext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   ret i16 %z
 }
 
@@ -543,7 +543,7 @@
   %yy = sext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   ret i16 %z
 }
 
@@ -563,7 +563,7 @@
   %yy = zext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
   %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   ret i16 %z
 }
 
@@ -583,7 +583,7 @@
   %yy = sext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
   %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   ret i16 %z
 }
 
@@ -598,7 +598,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %m = mul <16 x i8> %x, %y
   %s = select <16 x i1> %c, <16 x i8> %m, <16 x i8> zeroinitializer
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
   ret i8 %z
 }
 
@@ -1010,7 +1010,7 @@
   %yy = zext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
   %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   ret i64 %z
 }
 
@@ -1353,7 +1353,7 @@
   %yy = sext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
   %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   ret i64 %z
 }
 
@@ -1405,7 +1405,7 @@
   %yy = zext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -1456,7 +1456,7 @@
   %yy = sext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -1509,7 +1509,7 @@
   %c = icmp eq <2 x i64> %b, zeroinitializer
   %m = mul <2 x i64> %x, %y
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   ret i64 %z
 }
 
@@ -1523,7 +1523,7 @@
   %c = icmp eq <4 x i32> %b, zeroinitializer
   %m = mul <4 x i32> %x, %y
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1540,7 +1540,7 @@
   %yy = zext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
   %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1557,7 +1557,7 @@
   %yy = sext <4 x i32> %y to <4 x i64>
   %m = mul <4 x i64> %xx, %yy
   %s = select <4 x i1> %c, <4 x i64> %m, <4 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1598,7 +1598,7 @@
   %yy = zext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1639,7 +1639,7 @@
   %yy = sext <2 x i32> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1656,7 +1656,7 @@
   %yy = zext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1673,7 +1673,7 @@
   %yy = sext <8 x i16> %y to <8 x i32>
   %m = mul <8 x i32> %xx, %yy
   %s = select <8 x i1> %c, <8 x i32> %m, <8 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1693,7 +1693,7 @@
   %yy = zext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1713,7 +1713,7 @@
   %yy = sext <4 x i16> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1729,7 +1729,7 @@
   %c = icmp eq <8 x i16> %b, zeroinitializer
   %m = mul <8 x i16> %x, %y
   %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -1746,7 +1746,7 @@
   %yy = zext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
   %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1763,7 +1763,7 @@
   %yy = sext <8 x i16> %y to <8 x i64>
   %m = mul <8 x i64> %xx, %yy
   %s = select <8 x i1> %c, <8 x i64> %m, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1781,7 +1781,7 @@
   %m = mul <8 x i32> %xx, %yy
   %ma = zext <8 x i32> %m to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1799,7 +1799,7 @@
   %m = mul <8 x i32> %xx, %yy
   %ma = sext <8 x i32> %m to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1816,7 +1816,7 @@
   %m = mul <8 x i32> %xx, %xx
   %ma = zext <8 x i32> %m to <8 x i64>
   %s = select <8 x i1> %c, <8 x i64> %ma, <8 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1873,7 +1873,7 @@
   %yy = zext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1929,7 +1929,7 @@
   %yy = sext <2 x i16> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -1946,7 +1946,7 @@
   %yy = zext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
   %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1963,7 +1963,7 @@
   %yy = sext <16 x i8> %y to <16 x i32>
   %m = mul <16 x i32> %xx, %yy
   %s = select <16 x i1> %c, <16 x i32> %m, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1981,7 +1981,7 @@
   %m = mul <16 x i16> %xx, %yy
   %ma = zext <16 x i16> %m to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -1999,7 +1999,7 @@
   %m = mul <16 x i16> %xx, %yy
   %ma = sext <16 x i16> %m to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -2016,7 +2016,7 @@
   %m = mul <16 x i16> %xx, %xx
   %ma = zext <16 x i16> %m to <16 x i32>
   %s = select <16 x i1> %c, <16 x i32> %ma, <16 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -2037,7 +2037,7 @@
   %yy = zext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -2060,7 +2060,7 @@
   %yy = sext <4 x i8> %y to <4 x i32>
   %m = mul <4 x i32> %xx, %yy
   %s = select <4 x i1> %c, <4 x i32> %m, <4 x i32> zeroinitializer
-  %z = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %s)
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
   %r = add i32 %z, %a
   ret i32 %r
 }
@@ -2078,7 +2078,7 @@
   %yy = zext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2096,7 +2096,7 @@
   %yy = sext <16 x i8> %y to <16 x i16>
   %m = mul <16 x i16> %xx, %yy
   %s = select <16 x i1> %c, <16 x i16> %m, <16 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2117,7 +2117,7 @@
   %yy = zext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
   %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2138,7 +2138,7 @@
   %yy = sext <8 x i8> %y to <8 x i16>
   %m = mul <8 x i16> %xx, %yy
   %s = select <8 x i1> %c, <8 x i16> %m, <8 x i16> zeroinitializer
-  %z = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %s)
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
   %r = add i16 %z, %a
   ret i16 %r
 }
@@ -2154,7 +2154,7 @@
   %c = icmp eq <16 x i8> %b, zeroinitializer
   %m = mul <16 x i8> %x, %y
   %s = select <16 x i1> %c, <16 x i8> %m, <16 x i8> zeroinitializer
-  %z = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %s)
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
   %r = add i8 %z, %a
   ret i8 %r
 }
@@ -2569,7 +2569,7 @@
   %yy = zext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
   %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -2917,7 +2917,7 @@
   %yy = sext <16 x i8> %y to <16 x i64>
   %m = mul <16 x i64> %xx, %yy
   %s = select <16 x i1> %c, <16 x i64> %m, <16 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -2974,7 +2974,7 @@
   %yy = zext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -3030,7 +3030,7 @@
   %yy = sext <2 x i8> %y to <2 x i64>
   %m = mul <2 x i64> %xx, %yy
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
@@ -3088,18 +3088,18 @@
   %c = icmp eq <2 x i64> %b, zeroinitializer
   %m = mul <2 x i64> %x, %y
   %s = select <2 x i1> %c, <2 x i64> %m, <2 x i64> zeroinitializer
-  %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %s)
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
   %r = add i64 %z, %a
   ret i64 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll
index 7510169..4ff6821 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mul.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x)
   ret i32 %z
 }
 
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %x)
   ret i32 %z
 }
 
@@ -42,7 +42,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %x)
   ret i32 %z
 }
 
@@ -58,7 +58,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %x)
   ret i16 %z
 }
 
@@ -76,7 +76,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %x)
   ret i16 %z
 }
 
@@ -95,7 +95,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %x)
   ret i16 %z
 }
 
@@ -113,7 +113,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %x)
   ret i8 %z
 }
 
@@ -133,7 +133,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %x)
   ret i8 %z
 }
 
@@ -154,7 +154,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %x)
   ret i8 %z
 }
 
@@ -163,7 +163,7 @@
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> %x)
   ret i64 %z
 }
 
@@ -179,7 +179,7 @@
 ; CHECK-NEXT:    mla r1, r3, r1, r2
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x)
   ret i64 %z
 }
 
@@ -207,7 +207,7 @@
 ; CHECK-NEXT:    mla r1, r1, r6, r4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x)
   ret i64 %z
 }
 
@@ -220,7 +220,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %x)
   %r = mul i32 %y, %z
   ret i32 %r
 }
@@ -238,7 +238,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %x)
   %r = mul i32 %y, %z
   ret i32 %r
 }
@@ -257,7 +257,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %x)
+  %z = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %x)
   %r = mul i32 %y, %z
   ret i32 %r
 }
@@ -275,7 +275,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %x)
   %r = mul i16 %y, %z
   ret i16 %r
 }
@@ -295,7 +295,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %x)
   %r = mul i16 %y, %z
   ret i16 %r
 }
@@ -316,7 +316,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %x)
+  %z = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %x)
   %r = mul i16 %y, %z
   ret i16 %r
 }
@@ -336,7 +336,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %x)
   %r = mul i8 %y, %z
   ret i8 %r
 }
@@ -358,7 +358,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %x)
   %r = mul i8 %y, %z
   ret i8 %r
 }
@@ -381,7 +381,7 @@
 ; CHECK-NEXT:    muls r0, r1, r0
 ; CHECK-NEXT:    bx lr
 entry:
-  %z = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %x)
+  %z = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %x)
   %r = mul i8 %y, %z
   ret i8 %r
 }
@@ -397,7 +397,7 @@
 ; CHECK-NEXT:    mov r0, r12
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.mul.v1i64(<1 x i64> %x)
   %r = mul i64 %y, %z
   ret i64 %r
 }
@@ -420,7 +420,7 @@
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %x)
   %r = mul i64 %y, %z
   ret i64 %r
 }
@@ -453,20 +453,20 @@
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
 entry:
-  %z = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %x)
+  %z = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %x)
   %r = mul i64 %y, %z
   ret i64 %r
 }
 
-declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>)
-declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>)
-declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>)
+declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.mul.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
+declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll
index 80c8ae6..9502716 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll
@@ -1,18 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
 
-declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
+declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
 
 define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) {
 ; CHECK-LABEL: vmaxv_s_v16i8:
@@ -20,7 +20,7 @@
 ; CHECK-NEXT:    mvn r0, #127
 ; CHECK-NEXT:    vmaxv.s8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
   ret i8 %r
 }
 
@@ -31,7 +31,7 @@
 ; CHECK-NEXT:    movt r0, #65535
 ; CHECK-NEXT:    vmaxv.s16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
   ret i16 %r
 }
 
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    mov.w r0, #-2147483648
 ; CHECK-NEXT:    vmaxv.s32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1)
   ret i32 %r
 }
 
@@ -51,7 +51,7 @@
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    vmaxv.u8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
   ret i8 %r
 }
 
@@ -61,7 +61,7 @@
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    vmaxv.u16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
   ret i16 %r
 }
 
@@ -71,7 +71,7 @@
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    vmaxv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1)
   ret i32 %r
 }
 
@@ -81,7 +81,7 @@
 ; CHECK-NEXT:    movs r0, #127
 ; CHECK-NEXT:    vminv.s8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
   ret i8 %r
 }
 
@@ -91,7 +91,7 @@
 ; CHECK-NEXT:    movw r0, #32767
 ; CHECK-NEXT:    vminv.s16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
   ret i16 %r
 }
 
@@ -101,7 +101,7 @@
 ; CHECK-NEXT:    mvn r0, #-2147483648
 ; CHECK-NEXT:    vminv.s32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1)
   ret i32 %r
 }
 
@@ -111,7 +111,7 @@
 ; CHECK-NEXT:    movs r0, #255
 ; CHECK-NEXT:    vminv.u8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
   ret i8 %r
 }
 
@@ -121,7 +121,7 @@
 ; CHECK-NEXT:    movw r0, #65535
 ; CHECK-NEXT:    vminv.u16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
   ret i16 %r
 }
 
@@ -131,7 +131,7 @@
 ; CHECK-NEXT:    mov.w r0, #-1
 ; CHECK-NEXT:    vminv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1)
   ret i32 %r
 }
 
@@ -142,7 +142,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmaxv.s8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
   %c = icmp sgt i8 %r, %s2
   %s = select i1 %c, i8 %r, i8 %s2
   ret i8 %s
@@ -157,7 +157,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
   %rs = sext i8 %r to i32
   %c = icmp sgt i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -169,7 +169,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmaxv.s16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
   %c = icmp sgt i16 %r, %s2
   %s = select i1 %c, i16 %r, i16 %s2
   ret i16 %s
@@ -185,7 +185,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
   %rs = sext i16 %r to i32
   %c = icmp sgt i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -197,7 +197,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmaxv.s32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1)
   %c = icmp sgt i32 %r, %s2
   %s = select i1 %c, i32 %r, i32 %s2
   ret i32 %s
@@ -208,7 +208,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmaxv.u8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
   %c = icmp ugt i8 %r, %s2
   %s = select i1 %c, i8 %r, i8 %s2
   ret i8 %s
@@ -223,7 +223,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
   %rs = zext i8 %r to i32
   %c = icmp ugt i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -235,7 +235,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmaxv.u16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
   %c = icmp ugt i16 %r, %s2
   %s = select i1 %c, i16 %r, i16 %s2
   ret i16 %s
@@ -250,7 +250,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
   %rs = zext i16 %r to i32
   %c = icmp ugt i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -262,7 +262,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmaxv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1)
   %c = icmp ugt i32 %r, %s2
   %s = select i1 %c, i32 %r, i32 %s2
   ret i32 %s
@@ -273,7 +273,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vminv.s8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
   %c = icmp slt i8 %r, %s2
   %s = select i1 %c, i8 %r, i8 %s2
   ret i8 %s
@@ -288,7 +288,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
   %rs = sext i8 %r to i32
   %c = icmp slt i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -300,7 +300,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vminv.s16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
   %c = icmp slt i16 %r, %s2
   %s = select i1 %c, i16 %r, i16 %s2
   ret i16 %s
@@ -315,7 +315,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
   %rs = sext i16 %r to i32
   %c = icmp slt i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -327,7 +327,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vminv.s32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1)
   %c = icmp slt i32 %r, %s2
   %s = select i1 %c, i32 %r, i32 %s2
   ret i32 %s
@@ -338,7 +338,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vminv.u8 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
   %c = icmp ult i8 %r, %s2
   %s = select i1 %c, i8 %r, i8 %s2
   ret i8 %s
@@ -353,7 +353,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
-  %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1)
+  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
   %rs = zext i8 %r to i32
   %c = icmp ult i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -365,7 +365,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vminv.u16 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
   %c = icmp ult i16 %r, %s2
   %s = select i1 %c, i16 %r, i16 %s2
   ret i16 %s
@@ -380,7 +380,7 @@
 ; CHECK-NEXT:    cmp r1, r0
 ; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
-  %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1)
+  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
   %rs = zext i16 %r to i32
   %c = icmp ult i32 %rs, %s2
   %s = select i1 %c, i32 %rs, i32 %s2
@@ -392,7 +392,7 @@
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vminv.u32 r0, q0
 ; CHECK-NEXT:    bx lr
-  %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1)
+  %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1)
   %c = icmp ult i32 %r, %s2
   %s = select i1 %c, i32 %r, i32 %s2
   ret i32 %s
diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll
index 2ea26b4..496d236 100644
--- a/llvm/test/CodeGen/X86/haddsub.ll
+++ b/llvm/test/CodeGen/X86/haddsub.ll
@@ -1628,8 +1628,8 @@
 ; Repeat tests from general reductions to verify output for hoppy targets:
 ; PR38971: https://bugs.llvm.org/show_bug.cgi?id=38971
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>)
+declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
 
 define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) {
 ; SSE3-SLOW-LABEL: fadd_reduce_v8f32:
@@ -1672,7 +1672,7 @@
 ; AVX-FAST-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vzeroupper
 ; AVX-FAST-NEXT:    retq
-  %r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a0, <8 x float> %a1)
+  %r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
   ret float %r
 }
 
@@ -1711,7 +1711,7 @@
 ; AVX-FAST-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vzeroupper
 ; AVX-FAST-NEXT:    retq
-  %r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %a0, <4 x double> %a1)
+  %r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
   ret double %r
 }
 
diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll
index 36b2de0..36bbdd4 100644
--- a/llvm/test/CodeGen/X86/pr45378.ll
+++ b/llvm/test/CodeGen/X86/pr45378.ll
@@ -6,7 +6,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW
 
-declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
 
 define i1 @parseHeaders(i64 * %ptr) nounwind {
 ; SSE2-LABEL: parseHeaders:
@@ -34,7 +34,7 @@
 ; AVX-NEXT:    retq
   %vptr = bitcast i64 * %ptr to <2 x i64> *
   %vload = load <2 x i64>, <2 x i64> * %vptr, align 8
-  %vreduce = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vload)
+  %vreduce = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vload)
   %vcheck = icmp eq i64 %vreduce, 0
   ret i1 %vcheck
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll
index 92f97d0469..3ddaf62 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll
@@ -32,7 +32,7 @@
 ; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -74,7 +74,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -124,7 +124,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -187,7 +187,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -229,7 +229,7 @@
 ; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -276,7 +276,7 @@
 ; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -336,7 +336,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -408,7 +408,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -499,7 +499,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -547,7 +547,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -601,7 +601,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -663,7 +663,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -738,7 +738,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -826,7 +826,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -933,7 +933,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -966,7 +966,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1002,7 +1002,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
   %a0 = load <2 x i8>, <2 x i8>* %p
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1043,7 +1043,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1075,7 +1075,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
   %a0 = load <4 x i8>, <4 x i8>* %p
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1103,7 +1103,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1135,7 +1135,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
   %a0 = load <8 x i8>, <8 x i8>* %p
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1169,7 +1169,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -1223,7 +1223,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -1285,7 +1285,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -1360,32 +1360,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 3c5e46e..e6b8956 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -59,7 +59,7 @@
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <2 x i64> %0 to <2 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
   ret i1 %b
 }
 
@@ -111,7 +111,7 @@
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i32> %0 to <4 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -176,7 +176,7 @@
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i8> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -205,7 +205,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    retq
   %a = trunc <16 x i8> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -262,7 +262,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i64> %0 to <4 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -351,7 +351,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i32> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -420,7 +420,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <16 x i16> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -492,7 +492,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <32 x i8> %0 to <32 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -597,7 +597,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i64> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -678,7 +678,7 @@
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %a = trunc <16 x i32> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -765,7 +765,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <32 x i16> %0 to <32 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -845,7 +845,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <64 x i8> %0 to <64 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
   ret i1 %b
 }
 
@@ -905,7 +905,7 @@
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <2 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
   ret i1 %b
 }
 
@@ -961,7 +961,7 @@
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -1014,7 +1014,7 @@
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1062,7 +1062,7 @@
 ; AVX512VL-NEXT:    setb %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1141,7 +1141,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -1207,7 +1207,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1274,7 +1274,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i16> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1354,7 +1354,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <32 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -1447,7 +1447,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1507,7 +1507,7 @@
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %a = icmp eq <16 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1595,7 +1595,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <32 x i16> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -1686,13 +1686,13 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <64 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
   ret i1 %b
 }
 
-declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>)
+declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
+declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
+declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
+declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>)
+declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>)
+declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
index 0df3238..64066be 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
@@ -28,7 +28,7 @@
 ; AVX-NEXT:    testq %rax, %rax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a0)
   %2 = icmp eq i64 %1, 0
   ret i1 %2
 }
@@ -79,7 +79,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %a0)
   %2 = icmp ne i64 %1, 0
   ret i1 %2
 }
@@ -136,7 +136,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a0)
   %2 = icmp eq i64 %1, 0
   ret i1 %2
 }
@@ -202,7 +202,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %a0)
   %2 = icmp ne i64 %1, 0
   ret i1 %2
 }
@@ -229,7 +229,7 @@
 ; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a0)
   %2 = icmp eq i32 %1, 0
   ret i1 %2
 }
@@ -256,7 +256,7 @@
 ; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a0)
   %2 = icmp ne i32 %1, 0
   ret i1 %2
 }
@@ -315,7 +315,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a0)
   %2 = icmp eq i32 %1, 0
   ret i1 %2
 }
@@ -380,7 +380,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a0)
   %2 = icmp ne i32 %1, 0
   ret i1 %2
 }
@@ -454,7 +454,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %a0)
   %2 = icmp eq i32 %1, 0
   ret i1 %2
 }
@@ -482,7 +482,7 @@
 ; AVX-NEXT:    testw %ax, %ax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0)
   %2 = icmp eq i16 %1, 0
   ret i1 %2
 }
@@ -510,7 +510,7 @@
 ; AVX-NEXT:    testw %ax, %ax
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0)
   %2 = icmp ne i16 %1, 0
   ret i1 %2
 }
@@ -542,7 +542,7 @@
 ; AVX-NEXT:    testw %ax, %ax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0)
   %2 = icmp eq i16 %1, 0
   ret i1 %2
 }
@@ -610,7 +610,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a0)
   %2 = icmp ne i16 %1, 0
   ret i1 %2
 }
@@ -684,7 +684,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %a0)
   %2 = icmp eq i16 %1, 0
   ret i1 %2
 }
@@ -767,7 +767,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %a0)
   %2 = icmp ne i16 %1, 0
   ret i1 %2
 }
@@ -795,7 +795,7 @@
 ; AVX-NEXT:    testb %al, %al
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -824,7 +824,7 @@
 ; AVX-NEXT:    testb %al, %al
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0)
   %2 = icmp ne i8 %1, 0
   ret i1 %2
 }
@@ -857,7 +857,7 @@
 ; AVX-NEXT:    testb %al, %al
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -894,7 +894,7 @@
 ; AVX-NEXT:    testb %al, %al
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0)
   %2 = icmp ne i8 %1, 0
   ret i1 %2
 }
@@ -971,7 +971,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -1054,7 +1054,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %a0)
   %2 = icmp ne i8 %1, 0
   ret i1 %2
 }
@@ -1146,33 +1146,33 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
 
-declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll
index 9545d29..8f7b02f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll
@@ -24,7 +24,7 @@
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -66,7 +66,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -114,7 +114,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -171,7 +171,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -193,7 +193,7 @@
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -215,7 +215,7 @@
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -265,7 +265,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -321,7 +321,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -386,7 +386,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -411,7 +411,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -436,7 +436,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -465,7 +465,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -528,7 +528,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -597,7 +597,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -675,7 +675,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -700,7 +700,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -726,7 +726,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -756,7 +756,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -790,7 +790,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -862,7 +862,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -940,7 +940,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -1027,32 +1027,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
index e98833e..ab91398 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
@@ -53,7 +53,7 @@
 ; AVX512-NEXT:    vaddss %xmm2, %xmm1, %xmm1
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
   ret float %1
 }
 
@@ -112,7 +112,7 @@
 ; AVX512-NEXT:    vaddss %xmm2, %xmm1, %xmm1
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
   ret float %1
 }
 
@@ -185,7 +185,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
   ret float %1
 }
 
@@ -268,7 +268,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
   ret float %1
 }
 
@@ -313,7 +313,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
   ret float %1
 }
 
@@ -367,7 +367,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
   ret float %1
 }
 
@@ -435,7 +435,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
   ret float %1
 }
 
@@ -513,7 +513,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
   ret float %1
 }
 
@@ -558,7 +558,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
   ret float %1
 }
 
@@ -612,7 +612,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
   ret float %1
 }
 
@@ -680,7 +680,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
   ret float %1
 }
 
@@ -758,7 +758,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
   ret float %1
 }
 
@@ -801,7 +801,7 @@
 ; AVX512-NEXT:    vaddsd %xmm2, %xmm1, %xmm1
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
   ret double %1
 }
 
@@ -853,7 +853,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
   ret double %1
 }
 
@@ -912,7 +912,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
   ret double %1
 }
 
@@ -982,7 +982,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
   ret double %1
 }
 
@@ -1021,7 +1021,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
   ret double %1
 }
 
@@ -1069,7 +1069,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
   ret double %1
 }
 
@@ -1124,7 +1124,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
   ret double %1
 }
 
@@ -1189,7 +1189,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
   ret double %1
 }
 
@@ -1228,7 +1228,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
   ret double %1
 }
 
@@ -1276,7 +1276,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
   ret double %1
 }
 
@@ -1331,7 +1331,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
   ret double %1
 }
 
@@ -1396,16 +1396,16 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
 
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double, <16 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll
index 7de6a25..ebcf1d3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll
@@ -39,7 +39,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
   ret float %1
 }
 
@@ -90,7 +90,7 @@
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
   ret float %1
 }
 
@@ -176,7 +176,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
   ret float %1
 }
 
@@ -327,7 +327,7 @@
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
   ret float %1
 }
 
@@ -367,7 +367,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
   ret float %1
 }
 
@@ -422,7 +422,7 @@
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
   ret float %1
 }
 
@@ -512,7 +512,7 @@
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
   ret float %1
 }
 
@@ -667,7 +667,7 @@
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
   ret float %1
 }
 
@@ -699,7 +699,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float undef, <2 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0)
   ret float %1
 }
 
@@ -746,7 +746,7 @@
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0)
   ret float %1
 }
 
@@ -828,7 +828,7 @@
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float undef, <8 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0)
   ret float %1
 }
 
@@ -975,7 +975,7 @@
 ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float undef, <16 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0)
   ret float %1
 }
 
@@ -1004,7 +1004,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
   ret double %1
 }
 
@@ -1042,7 +1042,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
   ret double %1
 }
 
@@ -1101,7 +1101,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
   ret double %1
 }
 
@@ -1229,7 +1229,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
   ret double %1
 }
 
@@ -1261,7 +1261,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
   ret double %1
 }
 
@@ -1302,7 +1302,7 @@
 ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
   ret double %1
 }
 
@@ -1364,7 +1364,7 @@
 ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
   ret double %1
 }
 
@@ -1467,7 +1467,7 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
   ret double %1
 }
 
@@ -1493,7 +1493,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; AVX512-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double undef, <2 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0)
   ret double %1
 }
 
@@ -1528,7 +1528,7 @@
 ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double undef, <4 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0)
   ret double %1
 }
 
@@ -1584,7 +1584,7 @@
 ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double undef, <8 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0)
   ret double %1
 }
 
@@ -1681,16 +1681,16 @@
 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double undef, <16 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
 
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v16f64(double, <16 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
+declare double @llvm.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
index 50b88c2..01b41c7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
@@ -39,7 +39,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0)
   ret float %1
 }
 
@@ -78,7 +78,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a0)
   ret float %1
 }
 
@@ -125,7 +125,7 @@
 ; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0)
   ret float %1
 }
 
@@ -179,7 +179,7 @@
 ; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a0)
   ret float %1
 }
 
@@ -206,7 +206,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
   ret double %1
 }
 
@@ -236,7 +236,7 @@
 ; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a0)
   ret double %1
 }
 
@@ -271,7 +271,7 @@
 ; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0)
   ret double %1
 }
 
@@ -313,16 +313,16 @@
 ; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index f4539c5..091990a 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -35,7 +35,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a0)
   ret float %1
 }
 
@@ -84,7 +84,7 @@
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a0)
   ret float %1
 }
 
@@ -155,7 +155,7 @@
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a0)
   ret float %1
 }
 
@@ -247,7 +247,7 @@
 ; AVX512-NEXT:    vmaxss %xmm8, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a0)
   ret float %1
 }
 
@@ -274,7 +274,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a0)
   ret double %1
 }
 
@@ -316,7 +316,7 @@
 ; AVX512-NEXT:    vmaxsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmax.v3f64(<3 x double> %a0)
   ret double %1
 }
 
@@ -350,7 +350,7 @@
 ; AVX512-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a0)
   ret double %1
 }
 
@@ -395,7 +395,7 @@
 ; AVX512-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a0)
   ret double %1
 }
 
@@ -447,7 +447,7 @@
 ; AVX512-NEXT:    vmaxsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a0)
   ret double %1
 }
 
@@ -511,18 +511,18 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call nnan half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half> %a0)
+  %1 = call nnan half @llvm.vector.reduce.fmax.v2f16(<2 x half> %a0)
   ret half %1
 }
-declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v3f64(<3 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
 
-declare half @llvm.experimental.vector.reduce.fmax.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
index c5e025b..af21b73 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
@@ -14,7 +14,7 @@
 ; ALL-LABEL: test_v1f32:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a0)
   ret float %1
 }
 
@@ -62,7 +62,7 @@
 ; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a0)
   ret float %1
 }
 
@@ -133,7 +133,7 @@
 ; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a0)
   ret float %1
 }
 
@@ -230,7 +230,7 @@
 ; AVX512-NEXT:    vmaxss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a0)
   ret float %1
 }
 
@@ -401,7 +401,7 @@
 ; AVX512VL-NEXT:    vmovss %xmm8, %xmm0, %xmm0 {%k1}
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a0)
   ret float %1
 }
 
@@ -661,7 +661,7 @@
 ; AVX512VL-NEXT:    vmovss %xmm8, %xmm0, %xmm0 {%k1}
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a0)
   ret float %1
 }
 
@@ -700,7 +700,7 @@
 ; AVX512-NEXT:    vmovsd %xmm2, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a0)
   ret double %1
 }
 
@@ -774,7 +774,7 @@
 ; AVX512-NEXT:    vmovsd %xmm2, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a0)
   ret double %1
 }
 
@@ -922,7 +922,7 @@
 ; AVX512VL-NEXT:    vmovsd %xmm8, %xmm0, %xmm0 {%k1}
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a0)
   ret double %1
 }
 
@@ -1091,18 +1091,18 @@
 ; AVX512-NEXT:    vmovsd %xmm2, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmax.v1f32(<1 x float>)
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v3f32(<3 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index 5846f58..de7c67c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -14,7 +14,7 @@
 ; ALL-LABEL: test_v1f32:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0)
   ret float %1
 }
 
@@ -43,7 +43,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0)
   ret float %1
 }
 
@@ -84,7 +84,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; AVX512-NEXT:    vminss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0)
   ret float %1
 }
 
@@ -133,7 +133,7 @@
 ; AVX512-NEXT:    vminss %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0)
   ret float %1
 }
 
@@ -204,7 +204,7 @@
 ; AVX512-NEXT:    vminss %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0)
   ret float %1
 }
 
@@ -296,7 +296,7 @@
 ; AVX512-NEXT:    vminss %xmm8, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0)
+  %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0)
   ret float %1
 }
 
@@ -323,7 +323,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
   ret double %1
 }
 
@@ -357,7 +357,7 @@
 ; AVX512-NEXT:    vminsd %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0)
   ret double %1
 }
 
@@ -402,7 +402,7 @@
 ; AVX512-NEXT:    vminsd %xmm2, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0)
   ret double %1
 }
 
@@ -454,7 +454,7 @@
 ; AVX512-NEXT:    vminsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0)
+  %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0)
   ret double %1
 }
 
@@ -518,20 +518,20 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call nnan half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half> %a0)
+  %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0)
   ret half %1
 }
 
-declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
 
-declare half @llvm.experimental.vector.reduce.fmin.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll
index 1d7436e..10e5842 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll
@@ -54,7 +54,7 @@
 ; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0)
   ret float %1
 }
 
@@ -151,7 +151,7 @@
 ; AVX512-NEXT:    vminss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0)
   ret float %1
 }
 
@@ -322,7 +322,7 @@
 ; AVX512VL-NEXT:    vmovss %xmm8, %xmm0, %xmm0 {%k1}
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0)
   ret float %1
 }
 
@@ -582,7 +582,7 @@
 ; AVX512VL-NEXT:    vmovss %xmm8, %xmm0, %xmm0 {%k1}
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0)
   ret float %1
 }
 
@@ -621,7 +621,7 @@
 ; AVX512-NEXT:    vmovsd %xmm2, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
   ret double %1
 }
 
@@ -691,7 +691,7 @@
 ; AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmin.v3f64(<3 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmin.v3f64(<3 x double> %a0)
   ret double %1
 }
 
@@ -765,7 +765,7 @@
 ; AVX512-NEXT:    vmovsd %xmm2, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0)
   ret double %1
 }
 
@@ -913,7 +913,7 @@
 ; AVX512VL-NEXT:    vmovsd %xmm8, %xmm0, %xmm0 {%k1}
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0)
   ret double %1
 }
 
@@ -1082,17 +1082,17 @@
 ; AVX512-NEXT:    vmovsd %xmm2, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
 
-declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v3f64(<3 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v3f64(<3 x double>)
+declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
+declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
+declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
index e99946e..7840ae8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
@@ -39,7 +39,7 @@
 ; AVX512-NEXT:    vmulss %xmm2, %xmm1, %xmm1
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
   ret float %1
 }
 
@@ -82,7 +82,7 @@
 ; AVX512-NEXT:    vmulss %xmm2, %xmm1, %xmm1
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
   ret float %1
 }
 
@@ -133,7 +133,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
   ret float %1
 }
 
@@ -191,7 +191,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
   ret float %1
 }
 
@@ -225,7 +225,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
   ret float %1
 }
 
@@ -265,7 +265,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
   ret float %1
 }
 
@@ -313,7 +313,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
   ret float %1
 }
 
@@ -368,7 +368,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
   ret float %1
 }
 
@@ -402,7 +402,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
   ret float %1
 }
 
@@ -442,7 +442,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
   ret float %1
 }
 
@@ -490,7 +490,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
   ret float %1
 }
 
@@ -545,7 +545,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
+  %1 = call fast float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
   ret float %1
 }
 
@@ -575,7 +575,7 @@
 ; AVX512-NEXT:    vmulsd %xmm2, %xmm1, %xmm1
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
   ret double %1
 }
 
@@ -608,7 +608,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
   ret double %1
 }
 
@@ -646,7 +646,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
   ret double %1
 }
 
@@ -691,7 +691,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
   ret double %1
 }
 
@@ -719,7 +719,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
   ret double %1
 }
 
@@ -750,7 +750,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
   ret double %1
 }
 
@@ -786,7 +786,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
   ret double %1
 }
 
@@ -828,7 +828,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
   ret double %1
 }
 
@@ -856,7 +856,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
   ret double %1
 }
 
@@ -887,7 +887,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
   ret double %1
 }
 
@@ -923,7 +923,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
   ret double %1
 }
 
@@ -965,16 +965,16 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
+  %1 = call fast double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
 
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double, <16 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
index 7a1c044..3693ef0 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
@@ -38,7 +38,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
   ret float %1
 }
 
@@ -89,7 +89,7 @@
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
   ret float %1
 }
 
@@ -175,7 +175,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
   ret float %1
 }
 
@@ -326,7 +326,7 @@
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
   ret float %1
 }
 
@@ -360,7 +360,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
   ret float %1
 }
 
@@ -407,7 +407,7 @@
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
   ret float %1
 }
 
@@ -489,7 +489,7 @@
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
   ret float %1
 }
 
@@ -636,7 +636,7 @@
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
   ret float %1
 }
 
@@ -668,7 +668,7 @@
 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float undef, <2 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float undef, <2 x float> %a0)
   ret float %1
 }
 
@@ -715,7 +715,7 @@
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %a0)
   ret float %1
 }
 
@@ -797,7 +797,7 @@
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float undef, <8 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float undef, <8 x float> %a0)
   ret float %1
 }
 
@@ -944,7 +944,7 @@
 ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float undef, <16 x float> %a0)
+  %1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float undef, <16 x float> %a0)
   ret float %1
 }
 
@@ -973,7 +973,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
   ret double %1
 }
 
@@ -1011,7 +1011,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
   ret double %1
 }
 
@@ -1070,7 +1070,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
   ret double %1
 }
 
@@ -1198,7 +1198,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
   ret double %1
 }
 
@@ -1226,7 +1226,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
   ret double %1
 }
 
@@ -1263,7 +1263,7 @@
 ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
   ret double %1
 }
 
@@ -1321,7 +1321,7 @@
 ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
   ret double %1
 }
 
@@ -1419,7 +1419,7 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
   ret double %1
 }
 
@@ -1445,7 +1445,7 @@
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; AVX512-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double undef, <2 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double undef, <2 x double> %a0)
   ret double %1
 }
 
@@ -1480,7 +1480,7 @@
 ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double undef, <4 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double undef, <4 x double> %a0)
   ret double %1
 }
 
@@ -1536,7 +1536,7 @@
 ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double undef, <8 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double undef, <8 x double> %a0)
   ret double %1
 }
 
@@ -1633,16 +1633,16 @@
 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double undef, <16 x double> %a0)
+  %1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double undef, <16 x double> %a0)
   ret double %1
 }
 
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
 
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v16f64(double, <16 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
+declare double @llvm.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 17671c6..d6cac39 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -85,7 +85,7 @@
 ; AVX512DQVL-NEXT:    vpmullq %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vmovq %xmm0, %rax
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -231,7 +231,7 @@
 ; AVX512DQVL-NEXT:    vmovq %xmm0, %rax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -443,7 +443,7 @@
 ; AVX512DQVL-NEXT:    vmovq %xmm0, %rax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -763,7 +763,7 @@
 ; AVX512DQVL-NEXT:    vmovq %xmm0, %rax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -799,7 +799,7 @@
 ; AVX512-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -841,7 +841,7 @@
 ; AVX512-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -905,7 +905,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -983,7 +983,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -1086,7 +1086,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -1119,7 +1119,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -1154,7 +1154,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -1195,7 +1195,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -1258,7 +1258,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -1380,7 +1380,7 @@
 ; AVX512DQVL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -1522,7 +1522,7 @@
 ; AVX512DQVL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -1555,7 +1555,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1607,7 +1607,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1668,7 +1668,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1842,7 +1842,7 @@
 ; AVX512DQVL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -2051,7 +2051,7 @@
 ; AVX512DQVL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -2325,7 +2325,7 @@
 ; AVX512DQVL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -2685,32 +2685,32 @@
 ; AVX512DQVL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512DQVL-NEXT:    vzeroupper
 ; AVX512DQVL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.mul.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.mul.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.mul.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.mul.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index a497c83..a5458d5 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -57,7 +57,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <2 x i64> %0 to <2 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
   ret i1 %b
 }
 
@@ -107,7 +107,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i32> %0 to <4 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -169,7 +169,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i8> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -198,7 +198,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    retq
   %a = trunc <16 x i8> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -253,7 +253,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i64> %0 to <4 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -338,7 +338,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i32> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -407,7 +407,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <16 x i16> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -479,7 +479,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <32 x i8> %0 to <32 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -580,7 +580,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i64> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -661,7 +661,7 @@
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %a = trunc <16 x i32> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -748,7 +748,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <32 x i16> %0 to <32 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -828,7 +828,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <64 x i8> %0 to <64 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a)
   ret i1 %b
 }
 
@@ -894,7 +894,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <2 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
   ret i1 %b
 }
 
@@ -945,7 +945,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -998,7 +998,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1046,7 +1046,7 @@
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1129,7 +1129,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -1197,7 +1197,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1264,7 +1264,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i16> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1341,7 +1341,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <32 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -1441,7 +1441,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1499,7 +1499,7 @@
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %a = icmp eq <16 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1587,7 +1587,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <32 x i16> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -1680,13 +1680,13 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <64 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a)
   ret i1 %b
 }
 
-declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>)
+declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
+declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
+declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>)
+declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>)
+declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>)
+declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index e125397..9cc38d8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -31,7 +31,7 @@
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0)
   %2 = icmp eq i64 %1, 0
   ret i1 %2
 }
@@ -60,7 +60,7 @@
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0)
   %2 = icmp ne i64 %1, 0
   ret i1 %2
 }
@@ -111,7 +111,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a0)
   %2 = icmp eq i64 %1, 0
   ret i1 %2
 }
@@ -175,7 +175,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a0)
   %2 = icmp ne i64 %1, 0
   ret i1 %2
 }
@@ -198,7 +198,7 @@
 ; AVX-NEXT:    testq %rax, %rax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a0)
   %2 = icmp eq i32 %1, 0
   ret i1 %2
 }
@@ -224,7 +224,7 @@
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a0)
   %2 = icmp ne i32 %1, 0
   ret i1 %2
 }
@@ -253,7 +253,7 @@
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
   %2 = icmp eq i32 %1, 0
   ret i1 %2
 }
@@ -304,7 +304,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a0)
   %2 = icmp ne i32 %1, 0
   ret i1 %2
 }
@@ -368,7 +368,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %a0)
   %2 = icmp eq i32 %1, 0
   ret i1 %2
 }
@@ -391,7 +391,7 @@
 ; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a0)
   %2 = icmp eq i16 %1, 0
   ret i1 %2
 }
@@ -410,7 +410,7 @@
 ; AVX-NEXT:    testq %rax, %rax
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a0)
   %2 = icmp ne i16 %1, 0
   ret i1 %2
 }
@@ -436,7 +436,7 @@
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a0)
   %2 = icmp eq i16 %1, 0
   ret i1 %2
 }
@@ -465,7 +465,7 @@
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0)
   %2 = icmp ne i16 %1, 0
   ret i1 %2
 }
@@ -516,7 +516,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %a0)
   %2 = icmp eq i16 %1, 0
   ret i1 %2
 }
@@ -580,7 +580,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %a0)
   %2 = icmp ne i16 %1, 0
   ret i1 %2
 }
@@ -603,7 +603,7 @@
 ; AVX-NEXT:    testw %ax, %ax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -622,7 +622,7 @@
 ; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a0)
   %2 = icmp ne i8 %1, 0
   ret i1 %2
 }
@@ -641,7 +641,7 @@
 ; AVX-NEXT:    testq %rax, %rax
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -667,7 +667,7 @@
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a0)
   %2 = icmp ne i8 %1, 0
   ret i1 %2
 }
@@ -696,7 +696,7 @@
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -747,7 +747,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %a0)
   %2 = icmp ne i8 %1, 0
   ret i1 %2
 }
@@ -811,7 +811,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0)
   %2 = icmp eq i8 %1, 0
   ret i1 %2
 }
@@ -841,7 +841,7 @@
 ; AVX-NEXT:    vptest {{.*}}(%rip), %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0)
   %2 = trunc i64 %1 to i16
   %3 = icmp eq i16 %2, 0
   ret i1 %3
@@ -888,7 +888,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
   %2 = and i32 %1, 2147483648
   %3 = icmp eq i32 %2, 0
   ret i1 %3
@@ -935,7 +935,7 @@
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0)
   %2 = trunc i16 %1 to i8
   %3 = icmp ne i8 %2, 0
   ret i1 %3
@@ -1003,7 +1003,7 @@
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0)
   %2 = and i8 %1, 1
   %3 = icmp eq i8 %2, 0
   ret i1 %3
@@ -1037,34 +1037,34 @@
 ; AVX-NEXT:    retq
   %2 = bitcast %struct.Box* %0 to <4 x i32>*
   %3 = load <4 x i32>, <4 x i32>* %2, align 4
-  %4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %3)
+  %4 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %3)
   %5 = and i32 %4, 15
   %6 = icmp eq i32 %5, 0
   ret i1 %6
 }
 
-declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll
index 35193e9..63c93f3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll
@@ -24,7 +24,7 @@
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -66,7 +66,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -114,7 +114,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -171,7 +171,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -193,7 +193,7 @@
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -215,7 +215,7 @@
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -265,7 +265,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -321,7 +321,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -386,7 +386,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -411,7 +411,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -436,7 +436,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -465,7 +465,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -528,7 +528,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -597,7 +597,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -675,7 +675,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -700,7 +700,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -726,7 +726,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -756,7 +756,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -790,7 +790,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -862,7 +862,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -940,7 +940,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -1027,32 +1027,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 89354e6..d4c5492 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -83,7 +83,7 @@
 ; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -209,7 +209,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -404,7 +404,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -731,7 +731,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -771,7 +771,7 @@
 ; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -819,7 +819,7 @@
 ; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -891,7 +891,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -981,7 +981,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -1104,7 +1104,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -1137,7 +1137,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -1172,7 +1172,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -1216,7 +1216,7 @@
 ; AVX512-NEXT:    xorl $32767, %eax # imm = 0x7FFF
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -1280,7 +1280,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -1354,7 +1354,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -1445,7 +1445,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -1491,7 +1491,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1547,7 +1547,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1615,7 +1615,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1685,7 +1685,7 @@
 ; AVX512-NEXT:    xorb $127, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -1781,7 +1781,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -1895,7 +1895,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -2042,32 +2042,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 106888b..9543db9 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -83,7 +83,7 @@
 ; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -209,7 +209,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -404,7 +404,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -731,7 +731,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -771,7 +771,7 @@
 ; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -819,7 +819,7 @@
 ; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -891,7 +891,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -981,7 +981,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -1104,7 +1104,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -1137,7 +1137,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -1172,7 +1172,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -1216,7 +1216,7 @@
 ; AVX512-NEXT:    xorl $32768, %eax # imm = 0x8000
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -1280,7 +1280,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -1354,7 +1354,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -1445,7 +1445,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -1491,7 +1491,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1547,7 +1547,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1615,7 +1615,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1685,7 +1685,7 @@
 ; AVX512-NEXT:    xorb $-128, %al
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -1781,7 +1781,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -1895,7 +1895,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -2042,32 +2042,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index aedde4d..27bf159 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -89,7 +89,7 @@
 ; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -231,7 +231,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -453,7 +453,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -832,7 +832,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -875,7 +875,7 @@
 ; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -929,7 +929,7 @@
 ; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -1010,7 +1010,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -1115,7 +1115,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -1265,7 +1265,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -1311,7 +1311,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -1361,7 +1361,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -1422,7 +1422,7 @@
 ; AVX512VL-NEXT:    notl %eax
 ; AVX512VL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512VL-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -1505,7 +1505,7 @@
 ; AVX512VL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -1602,7 +1602,7 @@
 ; AVX512VL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -1721,7 +1721,7 @@
 ; AVX512VL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -1754,7 +1754,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1790,7 +1790,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1832,7 +1832,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1901,7 +1901,7 @@
 ; AVX512VL-NEXT:    notb %al
 ; AVX512VL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512VL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -1994,7 +1994,7 @@
 ; AVX512VL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -2099,7 +2099,7 @@
 ; AVX512VL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -2222,32 +2222,32 @@
 ; AVX512VL-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index c8195d2..dee8970 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -89,7 +89,7 @@
 ; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -232,7 +232,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -456,7 +456,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -836,7 +836,7 @@
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -879,7 +879,7 @@
 ; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -933,7 +933,7 @@
 ; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -1014,7 +1014,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -1119,7 +1119,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -1269,7 +1269,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -1315,7 +1315,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -1365,7 +1365,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -1407,7 +1407,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -1467,7 +1467,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -1539,7 +1539,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -1632,7 +1632,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -1665,7 +1665,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -1701,7 +1701,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -1743,7 +1743,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -1791,7 +1791,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -1859,7 +1859,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -1937,7 +1937,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -2032,32 +2032,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 5b0109b..efb45ee 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -57,7 +57,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <2 x i64> %0 to <2 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
   ret i1 %b
 }
 
@@ -107,7 +107,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i32> %0 to <4 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -172,7 +172,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i8> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -201,7 +201,7 @@
 ; AVX512-NEXT:    setnp %al
 ; AVX512-NEXT:    retq
   %a = trunc <16 x i8> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -256,7 +256,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <4 x i64> %0 to <4 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -345,7 +345,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i32> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -423,7 +423,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <16 x i16> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -504,7 +504,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <32 x i8> %0 to <32 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -609,7 +609,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <8 x i64> %0 to <8 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -717,7 +717,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <16 x i32> %0 to <16 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -815,7 +815,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <32 x i16> %0 to <32 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -912,7 +912,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = trunc <64 x i8> %0 to <64 x i1>
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
   ret i1 %b
 }
 
@@ -978,7 +978,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <2 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
   ret i1 %b
 }
 
@@ -1029,7 +1029,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -1082,7 +1082,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1136,7 +1136,7 @@
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1219,7 +1219,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <4 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
   ret i1 %b
 }
 
@@ -1288,7 +1288,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1366,7 +1366,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i16> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1454,7 +1454,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <32 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -1557,7 +1557,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <8 x i64> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
 
@@ -1643,7 +1643,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <16 x i32> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
   ret i1 %b
 }
 
@@ -1743,7 +1743,7 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <32 x i16> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
   ret i1 %b
 }
 
@@ -1853,13 +1853,13 @@
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
   %a = icmp eq <64 x i8> %0, zeroinitializer
-  %b = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> %a)
+  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
   ret i1 %b
 }
 
-declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>)
+declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
+declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>)
+declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>)
+declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>)
+declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>)
+declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
index 4fd8489..b1306c7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
@@ -24,7 +24,7 @@
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
 
@@ -66,7 +66,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %a0)
   ret i64 %1
 }
 
@@ -114,7 +114,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
 
@@ -171,7 +171,7 @@
 ; AVX512-NEXT:    vmovq %xmm0, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> %a0)
+  %1 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
 
@@ -193,7 +193,7 @@
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
 
@@ -215,7 +215,7 @@
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
 
@@ -265,7 +265,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a0)
   ret i32 %1
 }
 
@@ -321,7 +321,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %a0)
   ret i32 %1
 }
 
@@ -386,7 +386,7 @@
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> %a0)
+  %1 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %a0)
   ret i32 %1
 }
 
@@ -411,7 +411,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
 
@@ -436,7 +436,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
 
@@ -465,7 +465,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
 
@@ -528,7 +528,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a0)
   ret i16 %1
 }
 
@@ -597,7 +597,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %a0)
   ret i16 %1
 }
 
@@ -675,7 +675,7 @@
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> %a0)
+  %1 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %a0)
   ret i16 %1
 }
 
@@ -700,7 +700,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
 
@@ -726,7 +726,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
 
@@ -756,7 +756,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
 
@@ -790,7 +790,7 @@
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
 
@@ -862,7 +862,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a0)
   ret i8 %1
 }
 
@@ -940,7 +940,7 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %a0)
   ret i8 %1
 }
 
@@ -1027,32 +1027,32 @@
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %1 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> %a0)
+  %1 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %a0)
   ret i8 %1
 }
 
-declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>)
+declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>)
 
-declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>)
+declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>)
 
-declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>)
+declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>)
+declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>)
 
-declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>)
+declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>)
+declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>)