Cleaned up debug variants of libclcore.bc
Bug: 38418449
Bug: 62028512
Bug: 37955136
Two major changes:
(More motivation/explanation/discussion are in the tagged bugs.)
1) Added the fourth variant of libclcore.bc, libclcore_debug_g.bc,
which both gets compiled with -g -O0 and provides runtime range
checking. This is needed for debug context related CTS tests to pass
when they are compiled with flags -g -O0.
2) Removed Element setters and getters defined under RS_G_RUNTIME.
The removed code is neither necessary or correct. After the
RS_G_RUNTIME specific setter/getter implementation was removed,
a bug in the default setter/getter implementation showed up with -O0
compilation as described in Bug 38418449, which caused segfaults
due to mismatch argument type between callers and callees of
rsSetElementAtImpl_<T>() and __rsAllocationVStoreImpl_<T>().
To fix it, I adjusted argument type in rsSetElementAtImpl_<T>() and
__rsAllocationVStoreXImpl_<T> defined in
frameworks/rs/driver/runtime/ll64/allocation.ll to match clang-
generated code from frameworks/rs/driver/runtime/rs_allocation.c.
E.g., char4 was <4 x i8> in ll64/allocation.ll, but was i32 in code
generated by clang. This caused segfaults on calls to the affected
functions in the final arm64-v8a code. short2 and half2 also have the
issue and fix.
Test: CTS on Angler and X86_64:
Test: With tests compiled using -g -O0 and system property
debug.rs.debug set to 1;
Test: With tests compiled using -g -O0;
Test: With tests compiled using the default flags;
Test: With tests compiled using the default flags and system property
debug.rs.debug set to 1.
Test: LLDB tests on X86_64 with no additional failures than those are
known to fail.
Change-Id: I23bd9ab6c7648d2762a77977f08ad3f20e31941c
diff --git a/Android.bp b/Android.bp
index 5835246..78992d8 100644
--- a/Android.bp
+++ b/Android.bp
@@ -183,6 +183,7 @@
required: [
"libclcore.bc",
"libclcore_debug.bc",
+ "libclcore_debug_g.bc",
"libclcore_g.bc",
"libcompiler_rt",
],
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 60d08be..cf1b869 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -447,6 +447,9 @@
// If we're debugging, use the debug library.
if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
+ if (ME.hasDebugInfo()) {
+ return SYSLIBPATH_BC"/libclcore_debug_g.bc";
+ }
return SYSLIBPATH_BC"/libclcore_debug.bc";
}
diff --git a/driver/runtime/Android.mk b/driver/runtime/Android.mk
index 8d7e5bf..71b5ab2 100755
--- a/driver/runtime/Android.mk
+++ b/driver/runtime/Android.mk
@@ -152,14 +152,38 @@
LOCAL_CFLAGS += $(clcore_cflags)
LOCAL_CFLAGS += -g -O0
LOCAL_SRC_FILES := $(clcore_base_files) $(clcore_g_files)
+LOCAL_SRC_FILES_32 := $(clcore_base_files_32)
+LOCAL_SRC_FILES_64 := $(clcore_base_files_64)
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),arm64))
LOCAL_CFLAGS_64 += -DARCH_ARM64_HAVE_NEON
endif
include $(LOCAL_PATH)/build_bc_lib.mk
-rs_g_runtime :=
+# Build a debug version of the library with debug info
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libclcore_debug_g.bc
+rs_debug_runtime := 1
+rs_g_runtime := 1
+LOCAL_CFLAGS += $(clcore_cflags)
+LOCAL_CFLAGS += -g -O0
+LOCAL_SRC_FILES := $(clcore_base_files)
+LOCAL_SRC_FILES += rs_abi_debuginfo.c
+LOCAL_SRC_FILES_32 := $(clcore_base_files_32)
+LOCAL_SRC_FILES_64 := $(clcore_base_files_64)
+
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),arm64))
+LOCAL_SRC_FILES_64 += arch/asimd.ll arch/clamp.c
+LOCAL_CFLAGS_64 += -DARCH_ARM64_HAVE_NEON
+else
+LOCAL_SRC_FILES_64 += arch/generic.c
+endif
+
+include $(LOCAL_PATH)/build_bc_lib.mk
+rs_debug_runtime :=
+rs_g_runtime :=
### Build new versions (librsrt_<ARCH>.bc) as host shared libraries.
### These will be used with bcc_compat and the support library.
diff --git a/driver/runtime/ll64/allocation.ll b/driver/runtime/ll64/allocation.ll
index 64b4a8f..94fa11b 100644
--- a/driver/runtime/ll64/allocation.ll
+++ b/driver/runtime/ll64/allocation.ll
@@ -82,10 +82,11 @@
}
!24 = !{!"char4", !15}
-define void @rsSetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @rsSetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x i8>*
- store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !24
+ %3 = bitcast i32 %val to <4 x i8>
+ store <4 x i8> %3, <4 x i8>* %2, align 4, !tbaa !24
ret void
}
@@ -144,10 +145,11 @@
}
!28 = !{!"uchar4", !15}
-define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x i8>*
- store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !28
+ %3 = bitcast i32 %val to <4 x i8>
+ store <4 x i8> %3, <4 x i8>* %2, align 4, !tbaa !28
ret void
}
@@ -174,10 +176,11 @@
}
!30 = !{!"short2", !15}
-define void @rsSetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @rsSetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <2 x i16>*
- store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !30
+ %3 = bitcast i32 %val to <2 x i16>
+ store <2 x i16> %3, <2 x i16>* %2, align 4, !tbaa !30
ret void
}
@@ -237,10 +240,11 @@
}
!34 = !{!"ushort2", !15}
-define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <2 x i16>*
- store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !34
+ %3 = bitcast i32 %val to <2 x i16>
+ store <2 x i16> %3, <2 x i16>* %2, align 4, !tbaa !34
ret void
}
@@ -685,10 +689,11 @@
}
!62 = !{!"half2", !15}
-define void @rsSetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, <2 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @rsSetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <2 x half>*
- store <2 x half> %val, <2 x half>* %2, align 4, !tbaa !62
+ %3 = bitcast i32 %val to <2 x half>
+ store <2 x half> %3, <2 x half>* %2, align 4, !tbaa !62
ret void
}
@@ -1026,10 +1031,11 @@
store <3 x i16> %4, <3 x i16>* %2, align 2
ret void
}
-define void @__rsAllocationVStoreXImpl_short2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @__rsAllocationVStoreXImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <2 x i16>*
- store <2 x i16> %val, <2 x i16>* %2, align 2
+ %3 = bitcast i32 %val to <2 x i16>
+ store <2 x i16> %3, <2 x i16>* %2, align 2
ret void
}
@@ -1047,17 +1053,19 @@
store <3 x i16> %4, <3 x i16>* %2, align 2
ret void
}
-define void @__rsAllocationVStoreXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @__rsAllocationVStoreXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <2 x i16>*
- store <2 x i16> %val, <2 x i16>* %2, align 2
+ %3 = bitcast i32 %val to <2 x i16>
+ store <2 x i16> %3, <2 x i16>* %2, align 2
ret void
}
-define void @__rsAllocationVStoreXImpl_char4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @__rsAllocationVStoreXImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x i8>*
- store <4 x i8> %val, <4 x i8>* %2, align 1
+ %3 = bitcast i32 %val to <4 x i8>
+ store <4 x i8> %3, <4 x i8>* %2, align 1
ret void
}
define void @__rsAllocationVStoreXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
@@ -1076,10 +1084,11 @@
ret void
}
-define void @__rsAllocationVStoreXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
+define void @__rsAllocationVStoreXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x i8>*
- store <4 x i8> %val, <4 x i8>* %2, align 1
+ %3 = bitcast i32 %val to <4 x i8>
+ store <4 x i8> %3, <4 x i8>* %2, align 1
ret void
}
define void @__rsAllocationVStoreXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
diff --git a/driver/runtime/rs_allocation.c b/driver/runtime/rs_allocation.c
index 2163e77..075b368 100644
--- a/driver/runtime/rs_allocation.c
+++ b/driver/runtime/rs_allocation.c
@@ -61,7 +61,6 @@
}
}
-#ifndef RS_DEBUG_RUNTIME
uint8_t*
rsOffset(rs_allocation a, uint32_t sizeOf, uint32_t x, uint32_t y,
uint32_t z) {
@@ -73,7 +72,6 @@
(z * stride * dimY)];
return dp;
}
-#endif
uint8_t*
rsOffsetNs(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
@@ -134,64 +132,6 @@
}
#else // NOT RS_DEBUG_RUNTIME
-// The functions rsSetElementAtImpl_T and rsGetElementAtImpl_T are implemented in bitcode
-// in ll32/allocation.ll and ll64/allocation.ll. To be able to provide debug info for
-// these functions define them here instead, if we are linking with the debug library.
-#ifdef RS_G_RUNTIME
-
-#define SET_ELEMENT_AT_IMPL_TYPE_SIZE(typename, size) \
- void rsSetElementAtImpl_##typename \
- (rs_allocation a, typename val, uint32_t x, uint32_t y, uint32_t z) { \
- typename* val_ptr = (typename*)rsOffset(a, size, x, y, z); \
- *val_ptr = val; \
- }
-
-#define GET_ELEMENT_AT_IMPL_TYPE_SIZE(typename, size) \
- typename rsGetElementAtImpl_##typename \
- (rs_allocation a, uint32_t x, uint32_t y, uint32_t z) { \
- typename *val_ptr = (typename*)rsOffset(a, size, x, y, z); \
- return *val_ptr; \
- }
-
-#define SET_ELEMENT_AT_IMPL_TYPE(typename) \
- SET_ELEMENT_AT_IMPL_TYPE_SIZE(typename, sizeof(typename)) \
- SET_ELEMENT_AT_IMPL_TYPE_SIZE(typename##2, sizeof(typename)*2) \
- SET_ELEMENT_AT_IMPL_TYPE_SIZE(typename##3, sizeof(typename)*4) \
- SET_ELEMENT_AT_IMPL_TYPE_SIZE(typename##4, sizeof(typename)*4)
-
-#define GET_ELEMENT_AT_IMPL_TYPE(typename) \
- GET_ELEMENT_AT_IMPL_TYPE_SIZE(typename, sizeof(typename)) \
- GET_ELEMENT_AT_IMPL_TYPE_SIZE(typename##2, sizeof(typename)*2) \
- GET_ELEMENT_AT_IMPL_TYPE_SIZE(typename##3, sizeof(typename)*4) \
- GET_ELEMENT_AT_IMPL_TYPE_SIZE(typename##4, sizeof(typename)*4)
-
-#define ELEMENT_AT_IMPL_TYPE(typename) \
- SET_ELEMENT_AT_IMPL_TYPE(typename) \
- GET_ELEMENT_AT_IMPL_TYPE(typename)
-
-ELEMENT_AT_IMPL_TYPE(char)
-ELEMENT_AT_IMPL_TYPE(uchar)
-ELEMENT_AT_IMPL_TYPE(short)
-ELEMENT_AT_IMPL_TYPE(ushort)
-ELEMENT_AT_IMPL_TYPE(int)
-ELEMENT_AT_IMPL_TYPE(uint)
-ELEMENT_AT_IMPL_TYPE(long)
-ELEMENT_AT_IMPL_TYPE(ulong)
-ELEMENT_AT_IMPL_TYPE(half)
-ELEMENT_AT_IMPL_TYPE(float)
-ELEMENT_AT_IMPL_TYPE(double)
-
-#undef ELEMENT_AT_IMPL_TYPE
-#undef GET_ELEMENT_AT_IMPL_TYPE
-#undef SET_ELEMENT_AT_IMPL_TYPE
-#undef GET_ELEMENT_AT_IMPL_TYPE_SIZE
-#undef SET_ELEMENT_AT_IMPL_TYPE_SIZE
-
-#define SET_ELEMENT_AT_TYPE_IMPL(T, typename) /* nothing */
-#define GET_ELEMENT_AT_TYPE_IMPL(T, typename) /* nothing */
-
-#else //NOT RS_G_RUNTIME
-
#define SET_ELEMENT_AT_TYPE_IMPL(T, typename) \
void \
rsSetElementAtImpl_##typename(rs_allocation a, typename val, uint32_t x, \
@@ -202,8 +142,6 @@
rsGetElementAtImpl_##typename(rs_allocation a, uint32_t x, uint32_t y, \
uint32_t z);
-#endif //RS_G_RUNTIME
-
#define SET_ELEMENT_AT_TYPE_DEF(T, typename) \
extern void __attribute__((overloadable)) \
rsSetElementAt_##typename(rs_allocation a, T val, uint32_t x) { \
@@ -402,33 +340,10 @@
return pin[((x >> shift) * cstep) + ((y >> shift) * stride)];
}
-// The functions rsAllocationVLoadXImpl_T and rsAllocationVStoreXImpl_T are implemented in
-// bitcode in ll32/allocation.ll and ll64/allocation.ll. To be able to provide debug info
-// for these functions define them here instead, if we are linking with the debug library.
-#ifdef RS_G_RUNTIME
-
-#define VOP_IMPL(T) \
- void __rsAllocationVStoreXImpl_##T \
- (rs_allocation a, const T val, uint32_t x, uint32_t y, uint32_t z) {\
- T *val_ptr = (T*)rsOffsetNs(a, x, y, z); \
- local_memcpy(val_ptr, &val, sizeof(T)); \
- } \
- T __rsAllocationVLoadXImpl_##T \
- (rs_allocation a, uint32_t x, uint32_t y, uint32_t z) { \
- T result = {}; \
- T* val_ptr = (T*)rsOffsetNs(a, x, y, z); \
- local_memcpy(&result, val_ptr, sizeof(T)); \
- return result; \
- }
-
-#else
-
#define VOP_IMPL(T) \
extern void __rsAllocationVStoreXImpl_##T(rs_allocation a, const T val, uint32_t x, uint32_t y, uint32_t z); \
extern T __rsAllocationVLoadXImpl_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
-#endif // RS_G_RUNTIME
-
#define VOP_DEF(T) \
extern void __attribute__((overloadable)) \
rsAllocationVStoreX_##T(rs_allocation a, T val, uint32_t x) { \