random: mix in architectural randomness earlier in extract_buf()

Previously if CPU chip had a built-in random number generator (i.e.,
RDRAND on newer x86 chips), we mixed it in at the very end of
extract_buf() using an XOR operation.

We now mix it in right after the calculate a hash across the entire
pool.  This has the advantage that any contribution of entropy from
the CPU's HWRNG will get mixed back into the pool.  In addition, it
means that if the HWRNG has any defects (either accidentally or
maliciously introduced), this will be mitigated via the non-linear
transform of the SHA-1 hash function before we hand out generated
output.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 2d5daf9..54d0208 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -904,7 +904,7 @@
 	int i;
 	union {
 		__u32 w[5];
-		unsigned long l[LONGS(EXTRACT_SIZE)];
+		unsigned long l[LONGS(20)];
 	} hash;
 	__u32 workspace[SHA_WORKSPACE_WORDS];
 	__u8 extract[64];
@@ -917,6 +917,17 @@
 		sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);
 
 	/*
+	 * If we have a architectural hardware random number
+	 * generator, mix that in, too.
+	 */
+	for (i = 0; i < LONGS(20); i++) {
+		unsigned long v;
+		if (!arch_get_random_long(&v))
+			break;
+		hash.l[i] ^= v;
+	}
+
+	/*
 	 * We mix the hash back into the pool to prevent backtracking
 	 * attacks (where the attacker knows the state of the pool
 	 * plus the current outputs, and attempts to find previous
@@ -945,17 +956,6 @@
 	hash.w[1] ^= hash.w[4];
 	hash.w[2] ^= rol32(hash.w[2], 16);
 
-	/*
-	 * If we have a architectural hardware random number
-	 * generator, mix that in, too.
-	 */
-	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
-		unsigned long v;
-		if (!arch_get_random_long(&v))
-			break;
-		hash.l[i] ^= v;
-	}
-
 	memcpy(out, &hash, EXTRACT_SIZE);
 	memset(&hash, 0, sizeof(hash));
 }