rs_matrix types are not 16-byte aligned, so we have to load them as align 4. Change-Id: I30742a23fe87db7cb68d2c97bc022f7ee418ef82

commit: 65cd36f9052a317bab961fd86415e4faa55d00d0 [log] [tgz]
author: Stephen Hines <srhines@google.com> Tue Jan 15 15:05:52 2013 -0800
committer: Stephen Hines <srhines@google.com> Tue Jan 15 15:07:15 2013 -0800
tree: 161b476e3918e301f5012cc3072c1acc5034f527
parent: 426db95e17cdbaa9b1ff26eb32d20374681a96c2 [diff]
diff --git a/lib/Renderscript/runtime/matrix.ll b/lib/Renderscript/runtime/matrix.ll
index e559d99..c56405d 100644
--- a/lib/Renderscript/runtime/matrix.ll
+++ b/lib/Renderscript/runtime/matrix.ll

@@ -25,13 +25,13 @@
 
   %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
   %px2 = bitcast float* %px to <4 x float>*
-  %xm = load <4 x float>* %px2
+  %xm = load <4 x float>* %px2, align 4
   %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
   %py2 = bitcast float* %py to <4 x float>*
-  %ym = load <4 x float>* %py2
+  %ym = load <4 x float>* %py2, align 4
   %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 6
   %pz2 = bitcast float* %pz to <3 x float>*
-  %zm2 = load <3 x float>* %pz2
+  %zm2 = load <3 x float>* %pz2, align 4
   %zm = shufflevector <3 x float> %zm2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 
   %a1 = fmul <4 x float> %x, %xm
@@ -56,10 +56,10 @@
 
   %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
   %px2 = bitcast float* %px to <4 x float>*
-  %xm = load <4 x float>* %px2
+  %xm = load <4 x float>* %px2, align 4
   %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
   %py2 = bitcast float* %py to <4 x float>*
-  %ym = load <4 x float>* %py2
+  %ym = load <4 x float>* %py2, align 4
 
   %a1 = fmul <4 x float> %x, %xm
   %a2 = fmul <4 x float> %y, %ym
@@ -85,16 +85,16 @@
 
   %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
   %px2 = bitcast float* %px to <4 x float>*
-  %xm = load <4 x float>* %px2
+  %xm = load <4 x float>* %px2, align 4
   %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
   %py2 = bitcast float* %py to <4 x float>*
-  %ym = load <4 x float>* %py2
+  %ym = load <4 x float>* %py2, align 4
   %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
   %pz2 = bitcast float* %pz to <4 x float>*
-  %zm = load <4 x float>* %pz2
+  %zm = load <4 x float>* %pz2, align 4
   %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
   %pw2 = bitcast float* %pw to <4 x float>*
-  %wm = load <4 x float>* %pw2
+  %wm = load <4 x float>* %pw2, align 4
 
   %a1 = fmul <4 x float> %x, %xm
   %a2 = fmul <4 x float> %y, %ym
@@ -121,16 +121,16 @@
 
   %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
   %px2 = bitcast float* %px to <4 x float>*
-  %xm = load <4 x float>* %px2
+  %xm = load <4 x float>* %px2, align 4
   %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
   %py2 = bitcast float* %py to <4 x float>*
-  %ym = load <4 x float>* %py2
+  %ym = load <4 x float>* %py2, align 4
   %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
   %pz2 = bitcast float* %pz to <4 x float>*
-  %zm = load <4 x float>* %pz2
+  %zm = load <4 x float>* %pz2, align 4
   %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
   %pw2 = bitcast float* %pw to <4 x float>*
-  %wm = load <4 x float>* %pw2
+  %wm = load <4 x float>* %pw2, align 4
 
   %a1 = fmul <4 x float> %x, %xm
   %a2 = fadd <4 x float> %wm, %a1
@@ -154,13 +154,13 @@
 
   %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
   %px2 = bitcast float* %px to <4 x float>*
-  %xm = load <4 x float>* %px2
+  %xm = load <4 x float>* %px2, align 4
   %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
   %py2 = bitcast float* %py to <4 x float>*
-  %ym = load <4 x float>* %py2
+  %ym = load <4 x float>* %py2, align 4
   %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
   %pw2 = bitcast float* %pw to <4 x float>*
-  %wm = load <4 x float>* %pw2
+  %wm = load <4 x float>* %pw2, align 4
 
   %a1 = fmul <4 x float> %x, %xm
   %a2 = fadd <4 x float> %wm, %a1
commit	65cd36f9052a317bab961fd86415e4faa55d00d0	[log] [tgz]
author	Stephen Hines <srhines@google.com>	Tue Jan 15 15:05:52 2013 -0800
committer	Stephen Hines <srhines@google.com>	Tue Jan 15 15:07:15 2013 -0800
tree	161b476e3918e301f5012cc3072c1acc5034f527
parent	426db95e17cdbaa9b1ff26eb32d20374681a96c2 [diff]