calibrate: home in on correct lpj value more quickly Binary chop with a jiffy-resync on each step to find an upper bound is slow, so just race in a tight-ish loop to find an underestimate. If done with lots of individual steps, sometimes several hundreds of iterations would be required, which would impose a significant overhead, and make the initial estimate very low. By taking slowly increasing steps there will be less overhead. E.g. an x86_64 2.67GHz could have fitted in 613 individual small delays, but in reality should have been able to fit in a single delay 644 times longer, so underestimated by 31 steps. To reach the equivalent of 644 small delays with the accelerating scheme now requires about 130 iterations, so has <1/4th of the overhead, and can therefore be expected to underestimate by only 7 steps. As now we have a better initial estimate we can binary chop over a smaller range. With the loop overhead in the initial estimate kept low, and the step sizes moderate, we won't have under-estimated by much, so chose as tight a range as we can. Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 191e56880a6a638ce931859317f37deb084b6433 [log] [tgz]
author: Phil Carmody <ext-phil.2.carmody@nokia.com> Tue Mar 22 16:34:13 2011 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Tue Mar 22 17:44:11 2011 -0700
tree: 26853fa62983f12b85badda6b9ee2197c2f10697
parent: 71c696b1d0310da3ab8033d743282959bd49d28b [diff] [blame]
diff --git a/init/calibrate.c b/init/calibrate.c
index b71643a..f9000df 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c

@@ -110,8 +110,8 @@
 
 /*
  * This is the number of bits of precision for the loops_per_jiffy.  Each
- * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
- * better than 1%
+ * time we refine our estimate after the first takes 1.5/HZ seconds, so try
+ * to start with a good estimate.
  * For the boot cpu we can skip the delay calibration and assign it a value
  * calculated based on the timer frequency.
  * For the rest of the CPUs we cannot assume that the timer frequency is same as
@@ -121,38 +121,49 @@
 
 static unsigned long __cpuinit calibrate_delay_converge(void)
 {
-	unsigned long lpj, ticks, loopbit;
-	int lps_precision = LPS_PREC;
+	/* First stage - slowly accelerate to find initial bounds */
+	unsigned long lpj, ticks, loopadd, chop_limit;
+	int trials = 0, band = 0, trial_in_band = 0;
 
 	lpj = (1<<12);
-	while ((lpj <<= 1) != 0) {
-		/* wait for "start of" clock tick */
-		ticks = jiffies;
-		while (ticks == jiffies)
-			/* nothing */;
-		/* Go .. */
-		ticks = jiffies;
-		__delay(lpj);
-		ticks = jiffies - ticks;
-		if (ticks)
-			break;
-	}
+
+	/* wait for "start of" clock tick */
+	ticks = jiffies;
+	while (ticks == jiffies)
+		; /* nothing */
+	/* Go .. */
+	ticks = jiffies;
+	do {
+		if (++trial_in_band == (1<<band)) {
+			++band;
+			trial_in_band = 0;
+		}
+		__delay(lpj * band);
+		trials += band;
+	} while (ticks == jiffies);
+	/*
+	 * We overshot, so retreat to a clear underestimate. Then estimate
+	 * the largest likely undershoot. This defines our chop bounds.
+	 */
+	trials -= band;
+	loopadd = lpj * band;
+	lpj *= trials;
+	chop_limit = lpj >> (LPS_PREC + 1);
 
 	/*
 	 * Do a binary approximation to get lpj set to
-	 * equal one clock (up to lps_precision bits)
+	 * equal one clock (up to LPS_PREC bits)
 	 */
-	lpj >>= 1;
-	loopbit = lpj;
-	while (lps_precision-- && (loopbit >>= 1)) {
-		lpj |= loopbit;
+	while (loopadd > chop_limit) {
+		lpj += loopadd;
 		ticks = jiffies;
 		while (ticks == jiffies)
-			/* nothing */;
+			; /* nothing */
 		ticks = jiffies;
 		__delay(lpj);
 		if (jiffies != ticks)	/* longer than 1 tick */
-			lpj &= ~loopbit;
+			lpj -= loopadd;
+		loopadd >>= 1;
 	}
 
 	return lpj;
commit	191e56880a6a638ce931859317f37deb084b6433	[log] [tgz]
author	Phil Carmody <ext-phil.2.carmody@nokia.com>	Tue Mar 22 16:34:13 2011 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Tue Mar 22 17:44:11 2011 -0700
tree	26853fa62983f12b85badda6b9ee2197c2f10697
parent	71c696b1d0310da3ab8033d743282959bd49d28b [diff] [blame]