[MTD] Improve software ECC calculation

Unrolling the loops produces denser and much faster code.
Add a config switch which allows to select the byte order of the
resulting ecc code. The current Linux implementation has a byte
swap versus the SmartMedia specification

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 2d0ebad..4d235b91 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -23,6 +23,14 @@
 	  device thinks the write was successful, a bit could have been
 	  flipped accidentaly due to device wear or something else.
 
+config MTD_NAND_ECC_SMC
+	bool "NAND ECC Smart Media byte order"
+	depends on MTD_NAND
+	default n
+	help
+	  Software ECC according to the Smart Media Specification.
+	  The original Linux implementation had byte 0 and 1 swapped.
+
 config MTD_NAND_AUTCPU12
 	tristate "SmartMediaCard on autronix autcpu12 board"
 	depends on MTD_NAND && ARCH_AUTCPU12
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 1018929..2a163e4 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -7,6 +7,8 @@
  * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com)
  *                         Toshiba America Electronics Components, Inc.
  *
+ * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de>
+ *
  * $Id: nand_ecc.c,v 1.15 2005/11/07 11:14:30 gleixner Exp $
  *
  * This file is free software; you can redistribute it and/or modify it
@@ -63,87 +65,75 @@
 };
 
 /**
- * nand_trans_result - [GENERIC] create non-inverted ECC
- * @reg2:	line parity reg 2
- * @reg3:	line parity reg 3
- * @ecc_code:	ecc
- *
- * Creates non-inverted ECC code from line parity
- */
-static void nand_trans_result(u_char reg2, u_char reg3, u_char *ecc_code)
-{
-	u_char a, b, i, tmp1, tmp2;
-
-	/* Initialize variables */
-	a = b = 0x80;
-	tmp1 = tmp2 = 0;
-
-	/* Calculate first ECC byte */
-	for (i = 0; i < 4; i++) {
-		if (reg3 & a)	/* LP15,13,11,9 --> ecc_code[0] */
-			tmp1 |= b;
-		b >>= 1;
-		if (reg2 & a)	/* LP14,12,10,8 --> ecc_code[0] */
-			tmp1 |= b;
-		b >>= 1;
-		a >>= 1;
-	}
-
-	/* Calculate second ECC byte */
-	b = 0x80;
-	for (i = 0; i < 4; i++) {
-		if (reg3 & a)	/* LP7,5,3,1 --> ecc_code[1] */
-			tmp2 |= b;
-		b >>= 1;
-		if (reg2 & a)	/* LP6,4,2,0 --> ecc_code[1] */
-			tmp2 |= b;
-		b >>= 1;
-		a >>= 1;
-	}
-
-	/* Store two of the ECC bytes */
-	ecc_code[0] = tmp1;
-	ecc_code[1] = tmp2;
-}
-
-/**
- * nand_calculate_ecc - [NAND Interface] Calculate 3 byte ECC code for 256 byte block
+ * nand_calculate_ecc - [NAND Interface] Calculate 3 byte ECC code
+ *			for 256 byte block
  * @mtd:	MTD block structure
  * @dat:	raw data
  * @ecc_code:	buffer for ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code)
+int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+		       u_char *ecc_code)
 {
-	u_char idx, reg1, reg2, reg3;
-	int j;
+	uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
+	int i;
 
 	/* Initialize variables */
 	reg1 = reg2 = reg3 = 0;
-	ecc_code[0] = ecc_code[1] = ecc_code[2] = 0;
 
 	/* Build up column parity */
-	for (j = 0; j < 256; j++) {
-
+	for(i = 0; i < 256; i++) {
 		/* Get CP0 - CP5 from table */
-		idx = nand_ecc_precalc_table[dat[j]];
+		idx = nand_ecc_precalc_table[*dat++];
 		reg1 ^= (idx & 0x3f);
 
 		/* All bit XOR = 1 ? */
 		if (idx & 0x40) {
-			reg3 ^= (u_char) j;
-			reg2 ^= ~((u_char) j);
+			reg3 ^= (uint8_t) i;
+			reg2 ^= ~((uint8_t) i);
 		}
 	}
 
 	/* Create non-inverted ECC code from line parity */
-	nand_trans_result(reg2, reg3, ecc_code);
+	tmp1  = (reg3 & 0x80) >> 0; /* B7 -> B7 */
+	tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */
+	tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */
+	tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */
+	tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */
+	tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */
+	tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */
+	tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */
+
+	tmp2  = (reg3 & 0x08) << 4; /* B3 -> B7 */
+	tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */
+	tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */
+	tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */
+	tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */
+	tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */
+	tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */
+	tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */
 
 	/* Calculate final ECC code */
-	ecc_code[0] = ~ecc_code[0];
-	ecc_code[1] = ~ecc_code[1];
+#ifdef CONFIG_NAND_ECC_SMC
+	ecc_code[0] = ~tmp2;
+	ecc_code[1] = ~tmp1;
+#else
+	ecc_code[0] = ~tmp1;
+	ecc_code[1] = ~tmp2;
+#endif
 	ecc_code[2] = ((~reg1) << 2) | 0x03;
+
 	return 0;
 }
+EXPORT_SYMBOL(nand_calculate_ecc);
+
+static inline int countbits(uint32_t byte)
+{
+	int res = 0;
+
+	for (;byte; byte >>= 1)
+		res += byte & 0x01;
+	return res;
+}
 
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
@@ -154,90 +144,54 @@
  *
  * Detect and correct a 1 bit error for 256 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc)
+int nand_correct_data(struct mtd_info *mtd, u_char *dat,
+		      u_char *read_ecc, u_char *calc_ecc)
 {
-	u_char a, b, c, d1, d2, d3, add, bit, i;
+	uint8_t s0, s1, s2;
 
-	/* Do error detection */
-	d1 = calc_ecc[0] ^ read_ecc[0];
-	d2 = calc_ecc[1] ^ read_ecc[1];
-	d3 = calc_ecc[2] ^ read_ecc[2];
-
-	if ((d1 | d2 | d3) == 0) {
-		/* No errors */
+#ifdef CONFIG_NAND_ECC_SMC
+	s0 = calc_ecc[0] ^ read_ecc[0];
+	s1 = calc_ecc[1] ^ read_ecc[1];
+	s2 = calc_ecc[2] ^ read_ecc[2];
+#else
+	s1 = calc_ecc[0] ^ read_ecc[0];
+	s0 = calc_ecc[1] ^ read_ecc[1];
+	s2 = calc_ecc[2] ^ read_ecc[2];
+#endif
+	if ((s0 | s1 | s2) == 0)
 		return 0;
-	} else {
-		a = (d1 ^ (d1 >> 1)) & 0x55;
-		b = (d2 ^ (d2 >> 1)) & 0x55;
-		c = (d3 ^ (d3 >> 1)) & 0x54;
 
-		/* Found and will correct single bit error in the data */
-		if ((a == 0x55) && (b == 0x55) && (c == 0x54)) {
-			c = 0x80;
-			add = 0;
-			a = 0x80;
-			for (i = 0; i < 4; i++) {
-				if (d1 & c)
-					add |= a;
-				c >>= 2;
-				a >>= 1;
-			}
-			c = 0x80;
-			for (i = 0; i < 4; i++) {
-				if (d2 & c)
-					add |= a;
-				c >>= 2;
-				a >>= 1;
-			}
-			bit = 0;
-			b = 0x04;
-			c = 0x80;
-			for (i = 0; i < 3; i++) {
-				if (d3 & c)
-					bit |= b;
-				c >>= 2;
-				b >>= 1;
-			}
-			b = 0x01;
-			a = dat[add];
-			a ^= (b << bit);
-			dat[add] = a;
-			return 1;
-		} else {
-			i = 0;
-			while (d1) {
-				if (d1 & 0x01)
-					++i;
-				d1 >>= 1;
-			}
-			while (d2) {
-				if (d2 & 0x01)
-					++i;
-				d2 >>= 1;
-			}
-			while (d3) {
-				if (d3 & 0x01)
-					++i;
-				d3 >>= 1;
-			}
-			if (i == 1) {
-				/* ECC Code Error Correction */
-				read_ecc[0] = calc_ecc[0];
-				read_ecc[1] = calc_ecc[1];
-				read_ecc[2] = calc_ecc[2];
-				return 2;
-			} else {
-				/* Uncorrectable Error */
-				return -1;
-			}
-		}
+	/* Check for a single bit error */
+	if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
+	    ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
+	    ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
+
+		uint32_t byteoffs, bitnum;
+
+		byteoffs = (s1 << 0) & 0x80;
+		byteoffs |= (s1 << 1) & 0x40;
+		byteoffs |= (s1 << 2) & 0x20;
+		byteoffs |= (s1 << 3) & 0x10;
+
+		byteoffs |= (s0 >> 4) & 0x08;
+		byteoffs |= (s0 >> 3) & 0x04;
+		byteoffs |= (s0 >> 2) & 0x02;
+		byteoffs |= (s0 >> 1) & 0x01;
+
+		bitnum = (s2 >> 5) & 0x04;
+		bitnum |= (s2 >> 4) & 0x02;
+		bitnum |= (s2 >> 3) & 0x01;
+
+		dat[byteoffs] ^= (1 << bitnum);
+
+		return 1;
 	}
 
-	/* Should never happen */
+	if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
+		return 1;
+
 	return -1;
 }
-
-EXPORT_SYMBOL(nand_calculate_ecc);
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");