[ARM,AArch64] Fix __rev16l and __rev16ll intrinsics

These two intrinsics are defined in arm_acle.h.

__rev16l needs to rotate by 16 bits, bit it was actually rotating by 2 bits.
For AArch64, where long is 64 bits, this would still be wrong.

__rev16ll was incorrect, it reversed the bytes in each 32-bit word, rather than
each 16-bit halfword. The correct implementation is to apply __rev16 to the top
and bottom words of the 64-bit value.

For AArch32 targets, these get compiled down to the hardware rev16 instruction
at -O1 and above. For AArch64 targets, the 64-bit ones get compiled to two
32-bit rev16 instructions, because there is not currently a pattern for the
64-bit rev16 instruction.

Differential Revision: http://reviews.llvm.org/D14609

llvm-svn: 253211
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 73a7e76..4be1d09 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -175,14 +175,18 @@
   return __ror(__rev(t), 16);
 }
 
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __rev16l(unsigned long t) {
-    return __rorl(__revl(t), sizeof(long) / 2);
-}
-
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __rev16ll(uint64_t t) {
-  return __rorll(__revll(t), 32);
+  return (((uint64_t)__rev16(t >> 32)) << 32) | __rev16(t);
+}
+
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+  __rev16l(unsigned long t) {
+#if __SIZEOF_LONG__ == 4
+    return __rev16(t);
+#else
+    return __rev16ll(t);
+#endif
 }
 
 /* REVSH */