Merge "LL version of rsClamp. Fix non-vector clamp." into jb-mr1-dev
diff --git a/lib/Renderscript/runtime/Android.mk b/lib/Renderscript/runtime/Android.mk
index 4227875..41f7a4d 100644
--- a/lib/Renderscript/runtime/Android.mk
+++ b/lib/Renderscript/runtime/Android.mk
@@ -29,7 +29,8 @@
convert.ll \
matrix.ll \
pixel_packing.ll \
- math.ll
+ math.ll \
+ rsClamp.ll
clcore_files := \
$(clcore_base_files) \
diff --git a/lib/Renderscript/runtime/arch/neon.ll b/lib/Renderscript/runtime/arch/neon.ll
index 0f49eb9..42b04c1 100644
--- a/lib/Renderscript/runtime/arch/neon.ll
+++ b/lib/Renderscript/runtime/arch/neon.ll
@@ -115,15 +115,12 @@
ret <2 x float> %b
}
-
define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
- %_value = tail call <2 x float> @smear_2f(float %value) nounwind readnone
- %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
- %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
- %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %_value, <2 x float> %_high) nounwind readnone
- %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
- %c = extractelement <2 x float> %b, i32 0
- ret float %c
+ %1 = fcmp olt float %value, %high
+ %2 = select i1 %1, float %value, float %high
+ %3 = fcmp ogt float %2, %low
+ %4 = select i1 %3, float %2, float %low
+ ret float %4
}
diff --git a/lib/Renderscript/runtime/rsClamp.ll b/lib/Renderscript/runtime/rsClamp.ll
new file mode 100644
index 0000000..eba678a
--- /dev/null
+++ b/lib/Renderscript/runtime/rsClamp.ll
@@ -0,0 +1,60 @@
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+
+define float @_Z7rsClampfff(float %value, float %low, float %high) nounwind readonly {
+ %1 = fcmp olt float %value, %high
+ %2 = select i1 %1, float %value, float %high
+ %3 = fcmp ogt float %2, %low
+ %4 = select i1 %3, float %2, float %low
+ ret float %4
+}
+
+define signext i8 @_Z7rsClampccc(i8 signext %value, i8 signext %low, i8 signext %high) nounwind readonly {
+ %1 = icmp slt i8 %value, %high
+ %2 = select i1 %1, i8 %value, i8 %high
+ %3 = icmp sgt i8 %2, %low
+ %4 = select i1 %3, i8 %2, i8 %low
+ ret i8 %4
+}
+
+define zeroext i8 @_Z7rsClamphhh(i8 zeroext %value, i8 zeroext %low, i8 zeroext %high) nounwind readonly {
+ %1 = icmp ult i8 %value, %high
+ %2 = select i1 %1, i8 %value, i8 %high
+ %3 = icmp ugt i8 %2, %low
+ %4 = select i1 %3, i8 %2, i8 %low
+ ret i8 %4
+}
+
+define signext i16 @_Z7rsClampsss(i16 signext %value, i16 signext %low, i16 signext %high) nounwind readonly {
+ %1 = icmp slt i16 %value, %high
+ %2 = select i1 %1, i16 %value, i16 %high
+ %3 = icmp sgt i16 %2, %low
+ %4 = select i1 %3, i16 %2, i16 %low
+ ret i16 %4
+}
+
+define zeroext i16 @_Z7rsClampttt(i16 zeroext %value, i16 zeroext %low, i16 zeroext %high) nounwind readonly {
+ %1 = icmp ult i16 %value, %high
+ %2 = select i1 %1, i16 %value, i16 %high
+ %3 = icmp ugt i16 %2, %low
+ %4 = select i1 %3, i16 %2, i16 %low
+ ret i16 %4
+}
+
+define i32 @_Z7rsClampiii(i32 %value, i32 %low, i32 %high) nounwind readonly {
+ %1 = icmp slt i32 %value, %high
+ %2 = select i1 %1, i32 %value, i32 %high
+ %3 = icmp sgt i32 %2, %low
+ %4 = select i1 %3, i32 %2, i32 %low
+ ret i32 %4
+}
+
+define i32 @_Z7rsClampjjj(i32 %value, i32 %low, i32 %high) nounwind readonly {
+ %1 = icmp ult i32 %value, %high
+ %2 = select i1 %1, i32 %value, i32 %high
+ %3 = icmp ugt i32 %2, %low
+ %4 = select i1 %3, i32 %2, i32 %low
+ ret i32 %4
+}
+
diff --git a/lib/Renderscript/runtime/rs_core.c b/lib/Renderscript/runtime/rs_core.c
index aaf1336..1f6bd5d 100644
--- a/lib/Renderscript/runtime/rs_core.c
+++ b/lib/Renderscript/runtime/rs_core.c
@@ -168,25 +168,3 @@
return rsMatrixMultiply((const rs_matrix2x2 *)m, in);
}
-/////////////////////////////////////////////////////
-// int ops
-/////////////////////////////////////////////////////
-
-extern uint __attribute__((overloadable, always_inline)) rsClamp(uint amount, uint low, uint high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-extern int __attribute__((overloadable, always_inline)) rsClamp(int amount, int low, int high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-extern ushort __attribute__((overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-extern uchar __attribute__((overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-extern char __attribute__((overloadable, always_inline)) rsClamp(char amount, char low, char high) {
- return amount < low ? low : (amount > high ? high : amount);
-}