xen/PMU: Initialization code for Xen PMU
Map shared data structure that will hold CPU registers, VPMU context,
V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills
this information in its handler and passes it to the guest for further
processing.
Set up PMU VIRQ.
Now that perf infrastructure will assume that PMU is available on a PV
guest we need to be careful and make sure that accesses via RDPMC
instruction don't cause fatal traps by the hypervisor. Provide a nop
RDPMC handler.
For the same reason avoid issuing a warning on a write to APIC's LVTPC.
Both of these will be made functional in later patches.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 4b6e29a..e47e527 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,7 +13,7 @@
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \
- p2m.o apic.o
+ p2m.o apic.o pmu.o
obj-$(CONFIG_EVENT_TRACING) += trace.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 70e060a..d03ebfa 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -72,6 +72,9 @@
static void xen_apic_write(u32 reg, u32 val)
{
+ if (reg == APIC_LVTPC)
+ return;
+
/* Warn to see if there's any stray references */
WARN(1,"register: %x, value: %x\n", reg, val);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 373dbc9..19072f9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -84,6 +84,7 @@
#include "mmu.h"
#include "smp.h"
#include "multicalls.h"
+#include "pmu.h"
EXPORT_SYMBOL_GPL(hypercall_page);
@@ -1082,6 +1083,11 @@
return ret;
}
+unsigned long long xen_read_pmc(int counter)
+{
+ return 0;
+}
+
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1216,7 +1222,7 @@
.write_msr = xen_write_msr_safe,
.read_tsc = native_read_tsc,
- .read_pmc = native_read_pmc,
+ .read_pmc = xen_read_pmc,
.read_tscp = native_read_tscp,
@@ -1267,6 +1273,10 @@
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ xen_pmu_finish(cpu);
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
BUG();
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
new file mode 100644
index 0000000..1d1ae1b
--- /dev/null
+++ b/arch/x86/xen/pmu.c
@@ -0,0 +1,170 @@
+#include <linux/types.h>
+#include <linux/interrupt.h>
+
+#include <asm/xen/hypercall.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
+
+#include "xen-ops.h"
+#include "pmu.h"
+
+/* x86_pmu.handle_irq definition */
+#include "../kernel/cpu/perf_event.h"
+
+
+/* Shared page between hypervisor and domain */
+static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())
+
+/* perf callbacks */
+static int xen_is_in_guest(void)
+{
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ pr_warn_once("%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
+ return 0;
+
+ return 1;
+}
+
+static int xen_is_user_mode(void)
+{
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ pr_warn_once("%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
+ return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
+ else
+ return !!(xenpmu_data->pmu.r.regs.cpl & 3);
+}
+
+static unsigned long xen_get_guest_ip(void)
+{
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ pr_warn_once("%s: pmudata not initialized\n", __func__);
+ return 0;
+ }
+
+ return xenpmu_data->pmu.r.regs.ip;
+}
+
+static struct perf_guest_info_callbacks xen_guest_cbs = {
+ .is_in_guest = xen_is_in_guest,
+ .is_user_mode = xen_is_user_mode,
+ .get_guest_ip = xen_get_guest_ip,
+};
+
+/* Convert registers from Xen's format to Linux' */
+static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
+ struct pt_regs *regs, uint64_t pmu_flags)
+{
+ regs->ip = xen_regs->ip;
+ regs->cs = xen_regs->cs;
+ regs->sp = xen_regs->sp;
+
+ if (pmu_flags & PMU_SAMPLE_PV) {
+ if (pmu_flags & PMU_SAMPLE_USER)
+ regs->cs |= 3;
+ else
+ regs->cs &= ~3;
+ } else {
+ if (xen_regs->cpl)
+ regs->cs |= 3;
+ else
+ regs->cs &= ~3;
+ }
+}
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
+{
+ int ret = IRQ_NONE;
+ struct pt_regs regs;
+ const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
+
+ if (!xenpmu_data) {
+ pr_warn_once("%s: pmudata not initialized\n", __func__);
+ return ret;
+ }
+
+ xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s,
+ xenpmu_data->pmu.pmu_flags);
+ if (x86_pmu.handle_irq(®s))
+ ret = IRQ_HANDLED;
+
+ return ret;
+}
+
+bool is_xen_pmu(int cpu)
+{
+ return (per_cpu(xenpmu_shared, cpu) != NULL);
+}
+
+void xen_pmu_init(int cpu)
+{
+ int err;
+ struct xen_pmu_params xp;
+ unsigned long pfn;
+ struct xen_pmu_data *xenpmu_data;
+
+ BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
+
+ if (xen_hvm_domain())
+ return;
+
+ xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
+ if (!xenpmu_data) {
+ pr_err("VPMU init: No memory\n");
+ return;
+ }
+ pfn = virt_to_pfn(xenpmu_data);
+
+ xp.val = pfn_to_mfn(pfn);
+ xp.vcpu = cpu;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+ err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
+ if (err)
+ goto fail;
+
+ per_cpu(xenpmu_shared, cpu) = xenpmu_data;
+
+ if (cpu == 0)
+ perf_register_guest_info_callbacks(&xen_guest_cbs);
+
+ return;
+
+fail:
+ pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
+ cpu, err);
+ free_pages((unsigned long)xenpmu_data, 0);
+}
+
+void xen_pmu_finish(int cpu)
+{
+ struct xen_pmu_params xp;
+
+ if (xen_hvm_domain())
+ return;
+
+ xp.vcpu = cpu;
+ xp.version.maj = XENPMU_VER_MAJ;
+ xp.version.min = XENPMU_VER_MIN;
+
+ (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
+
+ free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
+ per_cpu(xenpmu_shared, cpu) = NULL;
+}
diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h
new file mode 100644
index 0000000..a76d2cf
--- /dev/null
+++ b/arch/x86/xen/pmu.h
@@ -0,0 +1,11 @@
+#ifndef __XEN_PMU_H
+#define __XEN_PMU_H
+
+#include <xen/interface/xenpmu.h>
+
+irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
+void xen_pmu_init(int cpu);
+void xen_pmu_finish(int cpu);
+bool is_xen_pmu(int cpu);
+
+#endif /* __XEN_PMU_H */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8648438..2a9ff73 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -26,6 +26,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/xenpmu.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@@ -38,6 +39,7 @@
#include "xen-ops.h"
#include "mmu.h"
#include "smp.h"
+#include "pmu.h"
cpumask_var_t xen_cpu_initialized_map;
@@ -50,6 +52,7 @@
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -148,11 +151,18 @@
kfree(per_cpu(xen_irq_work, cpu).name);
per_cpu(xen_irq_work, cpu).name = NULL;
}
+
+ if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
+ unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
+ per_cpu(xen_pmu_irq, cpu).irq = -1;
+ kfree(per_cpu(xen_pmu_irq, cpu).name);
+ per_cpu(xen_pmu_irq, cpu).name = NULL;
+ }
};
static int xen_smp_intr_init(unsigned int cpu)
{
int rc;
- char *resched_name, *callfunc_name, *debug_name;
+ char *resched_name, *callfunc_name, *debug_name, *pmu_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -218,6 +228,18 @@
per_cpu(xen_irq_work, cpu).irq = rc;
per_cpu(xen_irq_work, cpu).name = callfunc_name;
+ if (is_xen_pmu(cpu)) {
+ pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
+ rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
+ xen_pmu_irq_handler,
+ IRQF_PERCPU|IRQF_NOBALANCING,
+ pmu_name, NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_pmu_irq, cpu).irq = rc;
+ per_cpu(xen_pmu_irq, cpu).name = pmu_name;
+ }
+
return 0;
fail:
@@ -335,6 +357,8 @@
}
set_cpu_sibling_map(0);
+ xen_pmu_init(0);
+
if (xen_smp_intr_init(0))
BUG();
@@ -462,6 +486,8 @@
if (rc)
return rc;
+ xen_pmu_init(cpu);
+
rc = xen_smp_intr_init(cpu);
if (rc)
return rc;
@@ -503,6 +529,7 @@
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
+ xen_pmu_finish(cpu);
}
}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 53b4c08..feddabd 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -11,6 +11,7 @@
#include "xen-ops.h"
#include "mmu.h"
+#include "pmu.h"
static void xen_pv_pre_suspend(void)
{
@@ -67,16 +68,26 @@
void xen_arch_pre_suspend(void)
{
- if (xen_pv_domain())
- xen_pv_pre_suspend();
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ xen_pmu_finish(cpu);
+
+ if (xen_pv_domain())
+ xen_pv_pre_suspend();
}
void xen_arch_post_suspend(int cancelled)
{
- if (xen_pv_domain())
- xen_pv_post_suspend(cancelled);
- else
- xen_hvm_post_suspend(cancelled);
+ int cpu;
+
+ if (xen_pv_domain())
+ xen_pv_post_suspend(cancelled);
+ else
+ xen_hvm_post_suspend(cancelled);
+
+ for_each_online_cpu(cpu)
+ xen_pmu_init(cpu);
}
static void xen_vcpu_notify_restore(void *data)