Merge pull request #89 from JaapKeuter/aes_risc_le

Introduce little endian RISC support.  Thanks for the contribution.
diff --git a/configure b/configure
index 40396d9..49a05d0 100755
--- a/configure
+++ b/configure
@@ -4079,15 +4079,8 @@
 $as_echo "#define HAVE_X86 1" >>confdefs.h
 ;;
 	* )
-	# CPU_RISC is only supported for big endian machines.
-	if test "$ac_cv_c_bigendian" = "yes"; then
 
 $as_echo "#define CPU_RISC 1" >>confdefs.h
-
-	else
-	   $as_echo "#define CPU_CISC 1" >>confdefs.h
-
-	fi
 	;;
 esac
 
diff --git a/configure.in b/configure.in
index 33bd935..ce22b20 100644
--- a/configure.in
+++ b/configure.in
@@ -25,13 +25,8 @@
         AC_DEFINE(HAVE_X86, 1,
 	   [Define to use X86 inlined assembly code]);; 
 	* )
-	# CPU_RISC is only supported for big endian machines.
-	if test "$ac_cv_c_bigendian" = "yes"; then
 	   AC_DEFINE(CPU_RISC, 1,
 	    [Define if building for a RISC machine (assume slow byte access).])
-	else
-	   AC_DEFINE(CPU_CISC, 1)
-	fi
 	;;
 esac	
 
diff --git a/crypto/cipher/aes.c b/crypto/cipher/aes.c
index a279682..e91e525 100644
--- a/crypto/cipher/aes.c
+++ b/crypto/cipher/aes.c
@@ -1528,6 +1528,7 @@
 #ifdef CPU_RISC
     uint32_t tmp;
 
+#ifdef WORDS_BIGENDIAN
     tmp = expanded_key->round[i].v32[0];
     expanded_key->round[i].v32[0] = 
       U0[T4[(tmp >> 24)       ] & 0xff] ^ 
@@ -1555,6 +1556,36 @@
       U1[T4[(tmp >> 16) & 0xff] & 0xff] ^ 
       U2[T4[(tmp >> 8)  & 0xff] & 0xff] ^ 
       U3[T4[(tmp)       & 0xff] & 0xff];
+#else
+    tmp = expanded_key->round[i].v32[0];
+    expanded_key->round[i].v32[0] = 
+      U3[T4[(tmp >> 24)       ] & 0xff] ^ 
+      U2[T4[(tmp >> 16) & 0xff] & 0xff] ^ 
+      U1[T4[(tmp >> 8)  & 0xff] & 0xff] ^ 
+      U0[T4[(tmp)       & 0xff] & 0xff];
+
+    tmp = expanded_key->round[i].v32[1];
+    expanded_key->round[i].v32[1] = 
+      U3[T4[(tmp >> 24)       ] & 0xff] ^ 
+      U2[T4[(tmp >> 16) & 0xff] & 0xff] ^ 
+      U1[T4[(tmp >> 8)  & 0xff] & 0xff] ^ 
+      U0[T4[(tmp)       & 0xff] & 0xff];
+
+    tmp = expanded_key->round[i].v32[2];
+    expanded_key->round[i].v32[2] = 
+      U3[T4[(tmp >> 24)       ] & 0xff] ^ 
+      U2[T4[(tmp >> 16) & 0xff] & 0xff] ^ 
+      U1[T4[(tmp >> 8)  & 0xff] & 0xff] ^ 
+      U0[T4[(tmp)       & 0xff] & 0xff];
+
+    tmp = expanded_key->round[i].v32[3];
+    expanded_key->round[i].v32[3] = 
+      U3[T4[(tmp >> 24)       ] & 0xff] ^ 
+      U2[T4[(tmp >> 16) & 0xff] & 0xff] ^ 
+      U1[T4[(tmp >> 8)  & 0xff] & 0xff] ^ 
+      U0[T4[(tmp)       & 0xff] & 0xff];
+#endif /* WORDS_BIGENDIAN */
+
 #else /* assume CPU_CISC */
 
     uint32_t c0, c1, c2, c3;
@@ -1768,7 +1799,6 @@
      of state, using the tables U0, U1, U2, U3 */
 
 #ifdef WORDS_BIGENDIAN
-  /* FIX!  WRong indexes */
   column0 = U0[state->v32[0] >> 24] ^ U1[(state->v32[3] >> 16) & 0xff]
     ^ U2[(state->v32[2] >> 8) & 0xff] ^ U3[state->v32[1] & 0xff];
 
@@ -1781,17 +1811,17 @@
   column3 = U0[state->v32[3] >> 24] ^ U1[(state->v32[2] >> 16) & 0xff]
     ^ U2[(state->v32[1] >> 8) & 0xff] ^ U3[state->v32[0] & 0xff];
 #else
-  column0 = U0[state->v32[0] & 0xff] ^ U1[(state->v32[1] >> 8) & 0xff]
-	^ U2[(state->v32[2] >> 16) & 0xff] ^ U3[state->v32[3] >> 24];
+  column0 = U0[state->v32[0] & 0xff] ^ U1[(state->v32[3] >> 8) & 0xff]
+    ^ U2[(state->v32[2] >> 16) & 0xff] ^ U3[(state->v32[1] >> 24) & 0xff];
 
-  column1 = U0[state->v32[1] & 0xff] ^ U1[(state->v32[2] >> 8) & 0xff]
-	^ U2[(state->v32[3] >> 16) & 0xff] ^ U3[state->v32[0] >> 24];
+  column1 = U0[state->v32[1] & 0xff] ^ U1[(state->v32[0] >> 8) & 0xff]
+    ^ U2[(state->v32[3] >> 16) & 0xff] ^ U3[(state->v32[2] >> 24) & 0xff];
 
-  column2 = U0[state->v32[2] & 0xff] ^ U1[(state->v32[3] >> 8) & 0xff]
-	^ U2[(state->v32[0] >> 16) & 0xff] ^ U3[state->v32[1] >> 24];
+  column2 = U0[state->v32[2] & 0xff] ^ U1[(state->v32[1] >> 8) & 0xff]
+    ^ U2[(state->v32[0] >> 16) & 0xff] ^ U3[(state->v32[3] >> 24) & 0xff];
 
-  column3 = U0[state->v32[3] & 0xff] ^ U1[(state->v32[0] >> 8) & 0xff]
-	^ U2[(state->v32[1] >> 16) & 0xff] ^ U3[state->v32[2] >> 24];
+  column3 = U0[state->v32[3] & 0xff] ^ U1[(state->v32[2] >> 8) & 0xff]
+    ^ U2[(state->v32[1] >> 16) & 0xff] ^ U3[(state->v32[0] >> 24) & 0xff];
 #endif /* WORDS_BIGENDIAN */
 
   state->v32[0] = column0 ^ round_key->v32[0];
@@ -1805,6 +1835,7 @@
 aes_final_round(v128_t *state, const v128_t *round_key) {
   uint32_t tmp0, tmp1, tmp2, tmp3;
 
+#ifdef WORDS_BIGENDIAN
   tmp0 = (T4[(state->v32[0] >> 24)]        & 0xff000000) 
        ^ (T4[(state->v32[1] >> 16) & 0xff] & 0x00ff0000) 
        ^ (T4[(state->v32[2] >>  8) & 0xff] & 0x0000ff00) 
@@ -1828,6 +1859,31 @@
        ^ (T4[(state->v32[1] >>  8) & 0xff] & 0x0000ff00)
        ^ (T4[(state->v32[2]      ) & 0xff] & 0x000000ff)
        ^ round_key->v32[3];
+#else
+  tmp0 = (T4[(state->v32[3] >> 24)]        & 0xff000000) 
+       ^ (T4[(state->v32[2] >> 16) & 0xff] & 0x00ff0000) 
+       ^ (T4[(state->v32[1] >>  8) & 0xff] & 0x0000ff00) 
+       ^ (T4[(state->v32[0]      ) & 0xff] & 0x000000ff) 
+       ^ round_key->v32[0];
+
+  tmp1 = (T4[(state->v32[0] >> 24)]        & 0xff000000)
+       ^ (T4[(state->v32[3] >> 16) & 0xff] & 0x00ff0000)
+       ^ (T4[(state->v32[2] >>  8) & 0xff] & 0x0000ff00)
+       ^ (T4[(state->v32[1]      ) & 0xff] & 0x000000ff)
+       ^ round_key->v32[1];
+
+  tmp2 = (T4[(state->v32[1] >> 24)]        & 0xff000000)
+       ^ (T4[(state->v32[0] >> 16) & 0xff] & 0x00ff0000)
+       ^ (T4[(state->v32[3] >>  8) & 0xff] & 0x0000ff00)
+       ^ (T4[(state->v32[2]      ) & 0xff] & 0x000000ff)
+       ^ round_key->v32[2];
+
+  tmp3 = (T4[(state->v32[2] >> 24)]        & 0xff000000)
+       ^ (T4[(state->v32[1] >> 16) & 0xff] & 0x00ff0000)
+       ^ (T4[(state->v32[0] >>  8) & 0xff] & 0x0000ff00)
+       ^ (T4[(state->v32[3]      ) & 0xff] & 0x000000ff)
+       ^ round_key->v32[3];
+#endif /* WORDS_BIGENDIAN */
 
   state->v32[0] = tmp0;
   state->v32[1] = tmp1;
@@ -1840,6 +1896,7 @@
 aes_inv_final_round(v128_t *state, const v128_t *round_key) {
   uint32_t tmp0, tmp1, tmp2, tmp3;
 
+#ifdef WORDS_BIGENDIAN
   tmp0 = (U4[(state->v32[0] >> 24)]        & 0xff000000) 
        ^ (U4[(state->v32[3] >> 16) & 0xff] & 0x00ff0000) 
        ^ (U4[(state->v32[2] >>  8) & 0xff] & 0x0000ff00) 
@@ -1863,6 +1920,31 @@
        ^ (U4[(state->v32[1] >>  8) & 0xff] & 0x0000ff00)
        ^ (U4[(state->v32[0]      ) & 0xff] & 0x000000ff)
        ^ round_key->v32[3];
+#else
+  tmp0 = (U4[(state->v32[1] >> 24)]        & 0xff000000) 
+       ^ (U4[(state->v32[2] >> 16) & 0xff] & 0x00ff0000) 
+       ^ (U4[(state->v32[3] >>  8) & 0xff] & 0x0000ff00) 
+       ^ (U4[(state->v32[0]      ) & 0xff] & 0x000000ff) 
+       ^ round_key->v32[0];
+
+  tmp1 = (U4[(state->v32[2] >> 24)]        & 0xff000000)
+       ^ (U4[(state->v32[3] >> 16) & 0xff] & 0x00ff0000)
+       ^ (U4[(state->v32[0] >>  8) & 0xff] & 0x0000ff00)
+       ^ (U4[(state->v32[1]      ) & 0xff] & 0x000000ff)
+       ^ round_key->v32[1];
+
+  tmp2 = (U4[(state->v32[3] >> 24)]        & 0xff000000)
+       ^ (U4[(state->v32[0] >> 16) & 0xff] & 0x00ff0000)
+       ^ (U4[(state->v32[1] >>  8) & 0xff] & 0x0000ff00)
+       ^ (U4[(state->v32[2]      ) & 0xff] & 0x000000ff)
+       ^ round_key->v32[2];
+
+  tmp3 = (U4[(state->v32[0] >> 24)]        & 0xff000000)
+       ^ (U4[(state->v32[1] >> 16) & 0xff] & 0x00ff0000)
+       ^ (U4[(state->v32[2] >>  8) & 0xff] & 0x0000ff00)
+       ^ (U4[(state->v32[3]      ) & 0xff] & 0x000000ff)
+       ^ round_key->v32[3];
+#endif /* WORDS_BIGENDIAN */
 
   state->v32[0] = tmp0;
   state->v32[1] = tmp1;