tools/power turbostat: simplify output, add Avg_MHz

Use 8 columns for each number ouput.
We don't fit into 80 columns on most machines,
so keep the format simple.

Print frequency in MHz instead of GHz.
We've got 8 columns now, so use them to
show low frequency in a more natural unit.

Many users didn't understand what %c0 meant,
so re-name it to be %Busy.

Add Avg_MHz column, which is the frequency that many
users expect to see -- the total number of cycles executed
over the measurement interval.

People found the previous GHz to be confusing, since
it was the speed only over the non-idle interval.
That measurement has been re-named Bzy_MHz.

Suggested-by: Dirk J. Brandewie
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index b4ddb74..56bfb52 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,21 +47,22 @@
 .PP
 .SH FIELD DESCRIPTIONS
 .nf
-\fBpk\fP processor package number.
-\fBcor\fP processor core number.
+\fBPackage\fP processor package number.
+\fBCore\fP processor core number.
 \fBCPU\fP Linux CPU (logical processor) number.
 Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology.
-\fB%c0\fP percent of the interval that the CPU retired instructions.
-\fBGHz\fP average clock rate while the CPU was in c0 state.
-\fBTSC\fP average GHz that the TSC ran during the entire interval.
-\fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states.
-\fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
-\fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
-\fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states.
-\fBPkg_W\fP Watts consumed by the whole package.
-\fBCor_W\fP Watts consumed by the core part of the package.
-\fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors.
-\fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
+\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
+\fB%Buzy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
+\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
+\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
+\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
+\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.
+\fBPkgWatt\fP Watts consumed by the whole package.
+\fBCorWatt\fP Watts consumed by the core part of the package.
+\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
+\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
 \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
 .fi
@@ -78,29 +79,17 @@
 Subsequent rows show per-CPU statistics.
 
 .nf
-[root@sandy]# ./turbostat
-cor CPU    %c0  GHz  TSC    %c1    %c3    %c6    %c7 CTMP PTMP   %pc2   %pc3   %pc6   %pc7  Pkg_W  Cor_W GFX_W
-          0.06 0.80 2.29   0.11   0.00   0.00  99.83   47   40   0.26   0.01   0.44  98.78   3.49   0.12  0.14
-  0   0   0.07 0.80 2.29   0.07   0.00   0.00  99.86   40   40   0.26   0.01   0.44  98.78   3.49   0.12  0.14
-  0   4   0.03 0.80 2.29   0.12
-  1   1   0.04 0.80 2.29   0.25   0.01   0.00  99.71   40
-  1   5   0.16 0.80 2.29   0.13
-  2   2   0.05 0.80 2.29   0.06   0.01   0.00  99.88   40
-  2   6   0.03 0.80 2.29   0.08
-  3   3   0.05 0.80 2.29   0.08   0.00   0.00  99.87   47
-  3   7   0.04 0.84 2.29   0.09
-.fi
-.SH SUMMARY EXAMPLE
-The "-s" option prints the column headers just once,
-and then the one line system summary for each sample interval.
-
-.nf
-[root@wsm]# turbostat -S
-   %c0  GHz  TSC    %c1    %c3    %c6 CTMP   %pc3   %pc6
-  1.40 2.81 3.38  10.78  43.47  44.35   42  13.67   2.09
-  1.34 2.90 3.38  11.48  58.96  28.23   41  19.89   0.15
-  1.55 2.72 3.38  26.73  37.66  34.07   42   2.53   2.80
-  1.37 2.83 3.38  16.95  60.05  21.63   42   5.76   0.20
+[root@ivy]# ./turbostat
+    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 
+       -       -       6    0.36    1596    3492       0    0.59    0.01   99.04    0.00      23      24   23.82    0.01   72.47    0.00    6.40    1.01    0.00
+       0       0       9    0.58    1596    3492       0    0.28    0.01   99.13    0.00      23      24   23.82    0.01   72.47    0.00    6.40    1.01    0.00
+       0       4       1    0.07    1596    3492       0    0.79
+       1       1      10    0.65    1596    3492       0    0.59    0.00   98.76    0.00      23
+       1       5       5    0.28    1596    3492       0    0.95
+       2       2      10    0.66    1596    3492       0    0.41    0.01   98.92    0.00      23
+       2       6       2    0.10    1597    3492       0    0.97
+       3       3       3    0.20    1596    3492       0    0.44    0.00   99.37    0.00      23
+       3       7       5    0.31    1596    3492       0    0.33
 .fi
 .SH VERBOSE EXAMPLE
 The "-v" option adds verbosity to the output:
@@ -154,55 +143,35 @@
 until ^C while the other CPUs are mostly idle:
 
 .nf
-[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
+root@ivy: turbostat cat /dev/zero > /dev/null
 ^C
-cor CPU    %c0  GHz  TSC    %c1    %c3    %c6   %pc3   %pc6
-          8.86 3.61 3.38  15.06  31.19  44.89   0.00   0.00
-  0   0   1.46 3.22 3.38  16.84  29.48  52.22   0.00   0.00
-  0   6   0.21 3.06 3.38  18.09
-  1   2   0.53 3.33 3.38   2.80  46.40  50.27
-  1   8   0.89 3.47 3.38   2.44
-  2   4   1.36 3.43 3.38   9.04  23.71  65.89
-  2  10   0.18 2.86 3.38  10.22
-  8   1   0.04 2.87 3.38  99.96   0.01   0.00
-  8   7  99.72 3.63 3.38   0.27
-  9   3   0.31 3.21 3.38   7.64  56.55  35.50
-  9   9   0.08 2.95 3.38   7.88
- 10   5   1.42 3.43 3.38   2.14  30.99  65.44
- 10  11   0.16 2.88 3.38   3.40
+    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 
+       -       -     496   12.75    3886    3492       0   13.16    0.04   74.04    0.00      36      36    0.00    0.00    0.00    0.00   23.15   17.65    0.00
+       0       0      22    0.57    3830    3492       0    0.83    0.02   98.59    0.00      27      36    0.00    0.00    0.00    0.00   23.15   17.65    0.00
+       0       4       9    0.24    3829    3492       0    1.15
+       1       1       4    0.09    3783    3492       0   99.91    0.00    0.00    0.00      36
+       1       5    3880   99.82    3888    3492       0    0.18
+       2       2      17    0.44    3813    3492       0    0.77    0.04   98.75    0.00      28
+       2       6      12    0.32    3823    3492       0    0.89
+       3       3      16    0.43    3844    3492       0    0.63    0.11   98.84    0.00      30
+       3       7       4    0.11    3827    3492       0    0.94
+30.372243 sec
+
 .fi
-Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit
+Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit
 while the other processors are generally in various states of idle.
 
-Note that cpu1 and cpu7 are HT siblings within core8.
-As cpu7 is very busy, it prevents its sibling, cpu1,
+Note that cpu1 and cpu5 are HT siblings within core1.
+As cpu5 is very busy, it prevents its sibling, cpu1,
 from entering a c-state deeper than c1.
 
-Note that turbostat reports average GHz of 3.63, while
-the arithmetic average of the GHz column above is lower.
-This is a weighted average, where the weight is %c0.  ie. it is the total number of
-un-halted cycles elapsed per time divided by the number of CPUs.
-.SH SMI COUNTING EXAMPLE
-On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
-This counter is shown by default under the "SMI" column.
-.nf
-[root@x980 ~]# turbostat
-cor CPU    %c0  GHz  TSC SMI    %c1    %c3    %c6 CTMP   %pc3   %pc6
-          0.11 1.91 3.38   0   1.84   0.26  97.79   29   0.82  83.87
-  0   0   0.40 1.63 3.38   0  10.27   0.12  89.20   20   0.82  83.88
-  0   6   0.06 1.63 3.38   0  10.61
-  1   2   0.37 2.63 3.38   0   0.02   0.10  99.51   22
-  1   8   0.01 1.62 3.38   0   0.39
-  2   4   0.07 1.62 3.38   0   0.04   0.07  99.82   23
-  2  10   0.02 1.62 3.38   0   0.09
-  8   1   0.23 1.64 3.38   0   0.10   1.07  98.60   24
-  8   7   0.02 1.64 3.38   0   0.31
-  9   3   0.03 1.62 3.38   0   0.03   0.05  99.89   29
-  9   9   0.02 1.62 3.38   0   0.05
- 10   5   0.07 1.62 3.38   0   0.08   0.12  99.73   27
- 10  11   0.03 1.62 3.38   0   0.13
-^C
-.fi
+Note that the Avg_MHz column reflects the total number of cycles executed
+divided by the measurement interval.  If the %Busy column is 100%,
+then the processor was running at that speed the entire interval.
+The Avg_MHz multiplied by the %Busy results in the Bzy_MHz --
+which is the average frequency while the processor was executing --
+not including any non-busy idle time.
+
 .SH NOTES
 
 .B "turbostat "
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 77eb130..18dab6e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -56,7 +56,7 @@
 unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
-unsigned int units = 1000000000;	/* Ghz etc */
+unsigned int units = 1000000;	/* MHz etc */
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nehalem_platform_info;
@@ -264,88 +264,93 @@
 	return 0;
 }
 
+/*
+ * Example Format w/ field column widths:
+ *
+ * Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ * 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567
+ */
+
 void print_header(void)
 {
 	if (show_pkg)
-		outp += sprintf(outp, "pk");
-	if (show_pkg)
-		outp += sprintf(outp, " ");
+		outp += sprintf(outp, "Package ");
 	if (show_core)
-		outp += sprintf(outp, "cor");
+		outp += sprintf(outp, "    Core ");
 	if (show_cpu)
-		outp += sprintf(outp, " CPU");
-	if (show_pkg || show_core || show_cpu)
-		outp += sprintf(outp, " ");
-	if (do_nhm_cstates)
-		outp += sprintf(outp, "   %%c0");
+		outp += sprintf(outp, "    CPU ");
 	if (has_aperf)
-		outp += sprintf(outp, "  GHz");
-	outp += sprintf(outp, "  TSC");
+		outp += sprintf(outp, "Avg_MHz ");
+	if (do_nhm_cstates)
+		outp += sprintf(outp, "  %%Busy ");
+	if (has_aperf)
+		outp += sprintf(outp, "Bzy_MHz ");
+	outp += sprintf(outp, "TSC_MHz ");
 	if (do_smi)
-		outp += sprintf(outp, " SMI");
+		outp += sprintf(outp, "    SMI ");
 	if (extra_delta_offset32)
-		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
+		outp += sprintf(outp, " count 0x%03X ", extra_delta_offset32);
 	if (extra_delta_offset64)
-		outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
+		outp += sprintf(outp, " COUNT 0x%03X ", extra_delta_offset64);
 	if (extra_msr_offset32)
-		outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
+		outp += sprintf(outp, "  MSR 0x%03X ", extra_msr_offset32);
 	if (extra_msr_offset64)
-		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
+		outp += sprintf(outp, "          MSR 0x%03X ", extra_msr_offset64);
 	if (do_nhm_cstates)
-		outp += sprintf(outp, "    %%c1");
+		outp += sprintf(outp, " CPU%%c1 ");
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, "    %%c3");
+		outp += sprintf(outp, " CPU%%c3 ");
 	if (do_nhm_cstates)
-		outp += sprintf(outp, "    %%c6");
+		outp += sprintf(outp, " CPU%%c6 ");
 	if (do_snb_cstates)
-		outp += sprintf(outp, "    %%c7");
+		outp += sprintf(outp, " CPU%%c7 ");
 
 	if (do_dts)
-		outp += sprintf(outp, " CTMP");
+		outp += sprintf(outp, "CoreTmp ");
 	if (do_ptm)
-		outp += sprintf(outp, " PTMP");
+		outp += sprintf(outp, " PkgTmp ");
 
 	if (do_snb_cstates)
-		outp += sprintf(outp, "   %%pc2");
+		outp += sprintf(outp, "Pkg%%pc2 ");
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, "   %%pc3");
+		outp += sprintf(outp, "Pkg%%pc3 ");
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, "   %%pc6");
+		outp += sprintf(outp, "Pkg%%pc6 ");
 	if (do_snb_cstates)
-		outp += sprintf(outp, "   %%pc7");
+		outp += sprintf(outp, "Pkg%%pc7 ");
 	if (do_c8_c9_c10) {
-		outp += sprintf(outp, "   %%pc8");
-		outp += sprintf(outp, "   %%pc9");
-		outp += sprintf(outp, "  %%pc10");
+		outp += sprintf(outp, "Pkg%%pc8 ");
+		outp += sprintf(outp, "Pkg%%pc9 ");
+		outp += sprintf(outp, "Pk%%pc10 ");
 	}
 
 	if (do_rapl && !rapl_joules) {
 		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, "  Pkg_W");
+			outp += sprintf(outp, "PkgWatt ");
 		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, "  Cor_W");
+			outp += sprintf(outp, "CorWatt ");
 		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, " GFX_W");
+			outp += sprintf(outp, "GFXWatt ");
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, " RAM_W");
+			outp += sprintf(outp, "RAMWatt ");
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, " PKG_%%");
+			outp += sprintf(outp, "  PKG_%% ");
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, " RAM_%%");
+			outp += sprintf(outp, "  RAM_%% ");
 	} else {
 		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, "  Pkg_J");
+			outp += sprintf(outp, "  Pkg_J ");
 		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, "  Cor_J");
+			outp += sprintf(outp, "  Cor_J ");
 		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, " GFX_J");
+			outp += sprintf(outp, "  GFX_J ");
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, " RAM_W");
+			outp += sprintf(outp, "  RAM_W ");
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, " PKG_%%");
+			outp += sprintf(outp, "  PKG_%% ");
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, " RAM_%%");
-		outp += sprintf(outp, " time");
+			outp += sprintf(outp, "  RAM_%% ");
+		outp += sprintf(outp, "  time ");
 
 	}
 	outp += sprintf(outp, "\n");
@@ -410,25 +415,12 @@
 
 /*
  * column formatting convention & formats
- * package: "pk" 2 columns %2d
- * core: "cor" 3 columns %3d
- * CPU: "CPU" 3 columns %3d
- * Pkg_W: %6.2
- * Cor_W: %6.2
- * GFX_W: %5.2
- * RAM_W: %5.2
- * GHz: "GHz" 3 columns %3.2
- * TSC: "TSC" 3 columns %3.2
- * SMI: "SMI" 4 columns %4d
- * percentage " %pc3" %6.2
- * Perf Status percentage: %5.2
- * "CTMP" 4 columns %4d
  */
 int format_counters(struct thread_data *t, struct core_data *c,
 	struct pkg_data *p)
 {
 	double interval_float;
-	char *fmt5, *fmt6;
+	char *fmt8;
 
 	 /* if showing only 1st thread in core and this isn't one, bail out */
 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -443,65 +435,52 @@
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
 		if (show_pkg)
-			outp += sprintf(outp, "  ");
-		if (show_pkg && show_core)
-			outp += sprintf(outp, " ");
+			outp += sprintf(outp, "       -");
 		if (show_core)
-			outp += sprintf(outp, "   ");
+			outp += sprintf(outp, "       -");
 		if (show_cpu)
-			outp += sprintf(outp, " " "   ");
+			outp += sprintf(outp, "       -");
 	} else {
 		if (show_pkg) {
 			if (p)
-				outp += sprintf(outp, "%2d", p->package_id);
+				outp += sprintf(outp, "%8d", p->package_id);
 			else
-				outp += sprintf(outp, "  ");
+				outp += sprintf(outp, "       -");
 		}
-		if (show_pkg && show_core)
-			outp += sprintf(outp, " ");
 		if (show_core) {
 			if (c)
-				outp += sprintf(outp, "%3d", c->core_id);
+				outp += sprintf(outp, "%8d", c->core_id);
 			else
-				outp += sprintf(outp, "   ");
+				outp += sprintf(outp, "       -");
 		}
 		if (show_cpu)
-			outp += sprintf(outp, " %3d", t->cpu_id);
+			outp += sprintf(outp, "%8d", t->cpu_id);
 	}
+
+	/* AvgMHz */
+	if (has_aperf)
+		outp += sprintf(outp, "%8.0f",
+			1.0 / units * t->aperf / interval_float);
+
 	/* %c0 */
 	if (do_nhm_cstates) {
-		if (show_pkg || show_core || show_cpu)
-			outp += sprintf(outp, " ");
 		if (!skip_c0)
-			outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc);
+			outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
 		else
-			outp += sprintf(outp, "  ****");
+			outp += sprintf(outp, "********");
 	}
 
-	/* GHz */
-	if (has_aperf) {
-		if (!aperf_mperf_unstable) {
-			outp += sprintf(outp, " %3.2f",
-				1.0 * t->tsc / units * t->aperf /
-				t->mperf / interval_float);
-		} else {
-			if (t->aperf > t->tsc || t->mperf > t->tsc) {
-				outp += sprintf(outp, " ***");
-			} else {
-				outp += sprintf(outp, "%3.1f*",
-					1.0 * t->tsc /
-					units * t->aperf /
-					t->mperf / interval_float);
-			}
-		}
-	}
+	/* BzyMHz */
+	if (has_aperf)
+		outp += sprintf(outp, "%8.0f",
+			1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
 
 	/* TSC */
-	outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
+	outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
 
 	/* SMI */
 	if (do_smi)
-		outp += sprintf(outp, "%4d", t->smi_count);
+		outp += sprintf(outp, "%8d", t->smi_count);
 
 	/* delta */
 	if (extra_delta_offset32)
@@ -520,9 +499,9 @@
 
 	if (do_nhm_cstates) {
 		if (!skip_c1)
-			outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc);
+			outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
 		else
-			outp += sprintf(outp, "  ****");
+			outp += sprintf(outp, "********");
 	}
 
 	/* print per-core data only for 1st thread in core */
@@ -530,79 +509,76 @@
 		goto done;
 
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
 	if (do_nhm_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
 	if (do_snb_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc);
 
 	if (do_dts)
-		outp += sprintf(outp, " %4d", c->core_temp_c);
+		outp += sprintf(outp, "%8d", c->core_temp_c);
 
 	/* print per-package data only for 1st core in package */
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		goto done;
 
 	if (do_ptm)
-		outp += sprintf(outp, " %4d", p->pkg_temp_c);
+		outp += sprintf(outp, "%8d", p->pkg_temp_c);
 
 	if (do_snb_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc);
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc);
 	if (do_snb_cstates)
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc);
 	if (do_c8_c9_c10) {
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc8/t->tsc);
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc9/t->tsc);
-		outp += sprintf(outp, " %6.2f", 100.0 * p->pc10/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc);
 	}
 
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
  	 * indicate that results are suspect by printing "**" in fraction place.
  	 */
-	if (interval_float < rapl_joule_counter_range) {
-		fmt5 = " %5.2f";
-		fmt6 = " %6.2f";
-	} else {
-		fmt5 = " %3.0f**";
-		fmt6 = " %4.0f**";
-	}
+	if (interval_float < rapl_joule_counter_range)
+		fmt8 = "%8.2f";
+	else
+		fmt8 = " %6.0f**";
 
 	if (do_rapl && !rapl_joules) {
 		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float);
+			outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
 		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float);
+			outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
 		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float);
+			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float);
+			outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float);
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
 	} else {
 		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, fmt6,
+			outp += sprintf(outp, fmt8,
 					p->energy_pkg * rapl_energy_units);
 		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, fmt6,
+			outp += sprintf(outp, fmt8,
 					p->energy_cores * rapl_energy_units);
 		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, fmt5,
+			outp += sprintf(outp, fmt8,
 					p->energy_gfx * rapl_energy_units);
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt5,
+			outp += sprintf(outp, fmt8,
 					p->energy_dram * rapl_energy_units);
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-	outp += sprintf(outp, fmt5, interval_float);
+			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+	outp += sprintf(outp, fmt8, interval_float);
 
 	}
 done:
@@ -2455,7 +2431,7 @@
 	cmdline(argc, argv);
 
 	if (verbose)
-		fprintf(stderr, "turbostat v3.6 Dec 2, 2013"
+		fprintf(stderr, "turbostat v3.7 Feb 6, 2014"
 			" - Len Brown <lenb@kernel.org>\n");
 
 	turbostat_init();