KVM: x2apic interface to lapic

This patch implements MSR interface to local apic as defines by x2apic
Intel specification.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 62ea2ab..683345a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,6 +32,7 @@
 #include <asm/current.h>
 #include <asm/apicdef.h>
 #include <asm/atomic.h>
+#include <asm/apicdef.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
 #include "trace.h"
@@ -158,6 +159,11 @@
 	apic_set_reg(apic, APIC_LVR, v);
 }
 
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -284,7 +290,12 @@
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 {
 	int result = 0;
-	u8 logical_id;
+	u32 logical_id;
+
+	if (apic_x2apic_mode(apic)) {
+		logical_id = apic_get_reg(apic, APIC_LDR);
+		return logical_id & mda;
+	}
 
 	logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
 
@@ -477,7 +488,10 @@
 	irq.level = icr_low & APIC_INT_ASSERT;
 	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
 	irq.shorthand = icr_low & APIC_SHORT_MASK;
-	irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
+	if (apic_x2apic_mode(apic))
+		irq.dest_id = icr_high;
+	else
+		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
@@ -538,6 +552,12 @@
 		return 0;
 
 	switch (offset) {
+	case APIC_ID:
+		if (apic_x2apic_mode(apic))
+			val = kvm_apic_id(apic);
+		else
+			val = kvm_apic_id(apic) << 24;
+		break;
 	case APIC_ARBPRI:
 		printk(KERN_WARNING "Access APIC ARBPRI register "
 		       "which is for P6\n");
@@ -564,28 +584,26 @@
 	return container_of(dev, struct kvm_lapic, dev);
 }
 
-static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
+static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+		void *data)
 {
-	return apic_hw_enabled(apic) &&
-	    addr >= apic->base_address &&
-	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
-}
-
-static int apic_mmio_read(struct kvm_io_device *this,
-			  gpa_t address, int len, void *data)
-{
-	struct kvm_lapic *apic = to_lapic(this);
-	unsigned int offset = address - apic->base_address;
 	unsigned char alignment = offset & 0xf;
 	u32 result;
-	if (!apic_mmio_in_range(apic, address))
-		return -EOPNOTSUPP;
+	/* this bitmask has a bit cleared for each reserver register */
+	static const u64 rmask = 0x43ff01ffffffe70cULL;
 
 	if ((alignment + len) > 4) {
-		printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
-		       (unsigned long)address, len);
-		return 0;
+		printk(KERN_ERR "KVM_APIC_READ: alignment error %x %d\n",
+				offset, len);
+		return 1;
 	}
+
+	if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
+		printk(KERN_ERR "KVM_APIC_READ: read reserved register %x\n",
+				offset);
+		return 1;
+	}
+
 	result = __apic_read(apic, offset & ~0xf);
 
 	trace_kvm_apic_read(offset, result);
@@ -604,6 +622,27 @@
 	return 0;
 }
 
+static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
+{
+	return apic_hw_enabled(apic) &&
+	    addr >= apic->base_address &&
+	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
+}
+
+static int apic_mmio_read(struct kvm_io_device *this,
+			   gpa_t address, int len, void *data)
+{
+	struct kvm_lapic *apic = to_lapic(this);
+	u32 offset = address - apic->base_address;
+
+	if (!apic_mmio_in_range(apic, address))
+		return -EOPNOTSUPP;
+
+	apic_reg_read(apic, offset, len, data);
+
+	return 0;
+}
+
 static void update_divide_count(struct kvm_lapic *apic)
 {
 	u32 tmp1, tmp2, tdcr;
@@ -657,42 +696,18 @@
 		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
 }
 
-static int apic_mmio_write(struct kvm_io_device *this,
-			   gpa_t address, int len, const void *data)
+static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 {
-	struct kvm_lapic *apic = to_lapic(this);
-	unsigned int offset = address - apic->base_address;
-	unsigned char alignment = offset & 0xf;
-	u32 val;
-	if (!apic_mmio_in_range(apic, address))
-		return -EOPNOTSUPP;
+	int ret = 0;
 
-	/*
-	 * APIC register must be aligned on 128-bits boundary.
-	 * 32/64/128 bits registers must be accessed thru 32 bits.
-	 * Refer SDM 8.4.1
-	 */
-	if (len != 4 || alignment) {
-		/* Don't shout loud, $infamous_os would cause only noise. */
-		apic_debug("apic write: bad size=%d %lx\n",
-			   len, (long)address);
-		return 0;
-	}
+	trace_kvm_apic_write(reg, val);
 
-	val = *(u32 *) data;
-
-	/* too common printing */
-	if (offset != APIC_EOI)
-		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
-			   "0x%x\n", __func__, offset, len, val);
-
-	offset &= 0xff0;
-
-	trace_kvm_apic_write(offset, val);
-
-	switch (offset) {
+	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
-		apic_set_reg(apic, APIC_ID, val);
+		if (!apic_x2apic_mode(apic))
+			apic_set_reg(apic, APIC_ID, val);
+		else
+			ret = 1;
 		break;
 
 	case APIC_TASKPRI:
@@ -705,11 +720,17 @@
 		break;
 
 	case APIC_LDR:
-		apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+		if (!apic_x2apic_mode(apic))
+			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+		else
+			ret = 1;
 		break;
 
 	case APIC_DFR:
-		apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+		if (!apic_x2apic_mode(apic))
+			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+		else
+			ret = 1;
 		break;
 
 	case APIC_SPIV: {
@@ -739,7 +760,9 @@
 		break;
 
 	case APIC_ICR2:
-		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
+		if (!apic_x2apic_mode(apic))
+			val &= 0xff000000;
+		apic_set_reg(apic, APIC_ICR2, val);
 		break;
 
 	case APIC_LVT0:
@@ -753,8 +776,8 @@
 		if (!apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 
-		val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
-		apic_set_reg(apic, offset, val);
+		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
+		apic_set_reg(apic, reg, val);
 
 		break;
 
@@ -762,7 +785,7 @@
 		hrtimer_cancel(&apic->lapic_timer.timer);
 		apic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
-		return 0;
+		break;
 
 	case APIC_TDCR:
 		if (val & 4)
@@ -771,11 +794,58 @@
 		update_divide_count(apic);
 		break;
 
+	case APIC_ESR:
+		if (apic_x2apic_mode(apic) && val != 0) {
+			printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
+			ret = 1;
+		}
+		break;
+
+	case APIC_SELF_IPI:
+		if (apic_x2apic_mode(apic)) {
+			apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
+		} else
+			ret = 1;
+		break;
 	default:
-		apic_debug("Local APIC Write to read-only register %x\n",
-			   offset);
+		ret = 1;
 		break;
 	}
+	if (ret)
+		apic_debug("Local APIC Write to read-only register %x\n", reg);
+	return ret;
+}
+
+static int apic_mmio_write(struct kvm_io_device *this,
+			    gpa_t address, int len, const void *data)
+{
+	struct kvm_lapic *apic = to_lapic(this);
+	unsigned int offset = address - apic->base_address;
+	u32 val;
+
+	if (!apic_mmio_in_range(apic, address))
+		return -EOPNOTSUPP;
+
+	/*
+	 * APIC register must be aligned on 128-bits boundary.
+	 * 32/64/128 bits registers must be accessed thru 32 bits.
+	 * Refer SDM 8.4.1
+	 */
+	if (len != 4 || (offset & 0xf)) {
+		/* Don't shout loud, $infamous_os would cause only noise. */
+		apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
+		return;
+	}
+
+	val = *(u32*)data;
+
+	/* too common printing */
+	if (offset != APIC_EOI)
+		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
+			   "0x%x\n", __func__, offset, len, val);
+
+	apic_reg_write(apic, offset & 0xff0, val);
+
 	return 0;
 }
 
@@ -834,6 +904,11 @@
 		value &= ~MSR_IA32_APICBASE_BSP;
 
 	vcpu->arch.apic_base = value;
+	if (apic_x2apic_mode(apic)) {
+		u32 id = kvm_apic_id(apic);
+		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
+		apic_set_reg(apic, APIC_LDR, ldr);
+	}
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
 
@@ -1130,3 +1205,35 @@
 
 	vcpu->arch.apic->vapic_addr = vapic_addr;
 }
+
+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 reg = (msr - APIC_BASE_MSR) << 4;
+
+	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+		return 1;
+
+	/* if this is ICR write vector before command */
+	if (msr == 0x830)
+		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+	return apic_reg_write(apic, reg, (u32)data);
+}
+
+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
+
+	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+		return 1;
+
+	if (apic_reg_read(apic, reg, 4, &low))
+		return 1;
+	if (msr == 0x830)
+		apic_reg_read(apic, APIC_ICR2, 4, &high);
+
+	*data = (((u64)high) << 32) | low;
+
+	return 0;
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index bc1c524..40010b0 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -46,4 +46,6 @@
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
 
+int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 086f9313..a50c832 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -867,6 +867,8 @@
 	case MSR_IA32_APICBASE:
 		kvm_set_apic_base(vcpu, data);
 		break;
+	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+		return kvm_x2apic_msr_write(vcpu, msr, data);
 	case MSR_IA32_MISC_ENABLE:
 		vcpu->arch.ia32_misc_enable_msr = data;
 		break;
@@ -1065,6 +1067,9 @@
 	case MSR_IA32_APICBASE:
 		data = kvm_get_apic_base(vcpu);
 		break;
+	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+		return kvm_x2apic_msr_read(vcpu, msr, pdata);
+		break;
 	case MSR_IA32_MISC_ENABLE:
 		data = vcpu->arch.ia32_misc_enable_msr;
 		break;
@@ -1469,7 +1474,7 @@
 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
 		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
 		0 /* Reserved, DCA */ | F(XMM4_1) |
-		F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
+		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
 		0 /* Reserved, XSAVE, OSXSAVE */;
 	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =