ACPI : Create "idle=nomwait" bootparam

"idle=nomwait" disables the use of the MWAIT
instruction from both C1 (C1_FFH) and deeper (C2C3_FFH)
C-states.

When MWAIT is unavailable, the BIOS and OS generally
negotiate to use the HALT instruction for C1,
and use IO accesses for deeper C-states.

This option is useful for power and performance
comparisons, and also to work around BIOS bugs
where broken MWAIT support is advertised.

http://bugzilla.kernel.org/show_bug.cgi?id=10807
http://bugzilla.kernel.org/show_bug.cgi?id=10914

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Li Shaohua <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 65db7f4..5e497d1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -818,7 +818,7 @@
 			See Documentation/ide/ide.txt.
 
 	idle=		[X86]
-			Format: idle=poll or idle=mwait, idle=halt
+			Format: idle=poll or idle=mwait, idle=halt, idle=nomwait
 			Poll forces a polling idle loop that can slightly improves the performance
 			of waking up a idle CPU, but will use a lot of power and make the system
 			run hot. Not recommended.
@@ -828,6 +828,7 @@
 			as idle=poll.
 			idle=halt. Halt is forced to be used for CPU idle.
 			In such case C2/C3 won't be used again.
+			idle=nomwait. Disable mwait for CPU C-states
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
 			Claim all unknown PCI IDE storage controllers.
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 612b3c4..3ab8373 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -57,6 +57,8 @@
 EXPORT_SYMBOL(boot_option_idle_override);
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
 
 void
 ia64_do_show_stack (struct unw_frame_info *info, void *arg)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc7294..4d629c6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -11,6 +11,8 @@
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -340,6 +342,15 @@
 		pm_idle = default_idle;
 		idle_halt = 1;
 		return 0;
+	} else if (!strcmp(str, "nomwait")) {
+		/*
+		 * If the boot option of "idle=nomwait" is added,
+		 * it means that mwait will be disabled for CPU C2/C3
+		 * states. In such case it won't touch the variable
+		 * of boot_option_idle_override.
+		 */
+		idle_nomwait = 1;
+		return 0;
 	} else
 		return -1;
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 9a803f8..4e1bb89 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -265,7 +265,20 @@
 
 	if (!pdc_in)
 		return status;
+	if (idle_nomwait) {
+		/*
+		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
+		 * mode will be disabled in the parameter of _PDC object.
+		 * Of course C1_FFH access mode will also be disabled.
+		 */
+		union acpi_object *obj;
+		u32 *buffer = NULL;
 
+		obj = pdc_in->pointer;
+		buffer = (u32 *)(obj->buffer.pointer);
+		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
+
+	}
 	status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
 
 	if (ACPI_FAILURE(status))
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index c75c7ac..d592dbb 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -957,13 +957,17 @@
 			} else {
 				continue;
 			}
-			if (cx.type == ACPI_STATE_C1 && idle_halt) {
+			if (cx.type == ACPI_STATE_C1 &&
+					(idle_halt || idle_nomwait)) {
 				/*
 				 * In most cases the C1 space_id obtained from
 				 * _CST object is FIXED_HARDWARE access mode.
 				 * But when the option of idle=halt is added,
 				 * the entry_method type should be changed from
 				 * CSTATE_FFH to CSTATE_HALT.
+				 * When the option of idle=nomwait is added,
+				 * the C1 entry_method type should be
+				 * CSTATE_HALT.
 				 */
 				cx.entry_method = ACPI_CSTATE_HALT;
 				snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index f36e28a..f88fa05 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -764,6 +764,7 @@
 
 extern unsigned long boot_option_idle_override;
 extern unsigned long idle_halt;
+extern unsigned long idle_nomwait;
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index bc22162..55402d2 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -728,6 +728,7 @@
 
 extern unsigned long		boot_option_idle_override;
 extern unsigned long		idle_halt;
+extern unsigned long		idle_nomwait;
 
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);