Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig
new file mode 100644
index 0000000..5ade198
--- /dev/null
+++ b/arch/i386/oprofile/Kconfig
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+	  
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff --git a/arch/i386/oprofile/Makefile b/arch/i386/oprofile/Makefile
new file mode 100644
index 0000000..30f3eb3
--- /dev/null
+++ b/arch/i386/oprofile/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o  \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_athlon.o \
+					   op_model_ppro.o op_model_p4.o
+oprofile-$(CONFIG_X86_IO_APIC)		+= nmi_timer_int.o
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
new file mode 100644
index 0000000..52d72e0
--- /dev/null
+++ b/arch/i386/oprofile/backtrace.c
@@ -0,0 +1,111 @@
+/**
+ * @file backtrace.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author David Smith
+ */
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+
+struct frame_head {
+	struct frame_head * ebp;
+	unsigned long ret;
+} __attribute__((packed));
+
+static struct frame_head *
+dump_backtrace(struct frame_head * head)
+{
+	oprofile_add_trace(head->ret);
+
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (head >= head->ebp)
+		return NULL;
+
+	return head->ebp;
+}
+
+/* check that the page(s) containing the frame head are present */
+static int pages_present(struct frame_head * head)
+{
+	struct mm_struct * mm = current->mm;
+
+	/* FIXME: only necessary once per page */
+	if (!check_user_page_readable(mm, (unsigned long)head))
+		return 0;
+
+	return check_user_page_readable(mm, (unsigned long)(head + 1));
+}
+
+/*
+ * |             | /\ Higher addresses
+ * |             |
+ * --------------- stack base (address of current_thread_info)
+ * | thread info |
+ * .             .
+ * |    stack    |
+ * --------------- saved regs->ebp value if valid (frame_head address)
+ * .             .
+ * --------------- struct pt_regs stored on stack (struct pt_regs *)
+ * |             |
+ * .             .
+ * |             |
+ * --------------- %esp
+ * |             |
+ * |             | \/ Lower addresses
+ *
+ * Thus, &pt_regs <-> stack base restricts the valid(ish) ebp values
+ */
+#ifdef CONFIG_FRAME_POINTER
+static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
+{
+	unsigned long headaddr = (unsigned long)head;
+	unsigned long stack = (unsigned long)regs;
+	unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
+
+	return headaddr > stack && headaddr < stack_base;
+}
+#else
+/* without fp, it's just junk */
+static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
+{
+	return 0;
+}
+#endif
+
+
+void
+x86_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	struct frame_head *head;
+
+#ifdef CONFIG_X86_64
+	head = (struct frame_head *)regs->rbp;
+#else
+	head = (struct frame_head *)regs->ebp;
+#endif
+
+	if (!user_mode(regs)) {
+		while (depth-- && valid_kernel_stack(head, regs))
+			head = dump_backtrace(head);
+		return;
+	}
+
+#ifdef CONFIG_SMP
+	if (!spin_trylock(&current->mm->page_table_lock))
+		return;
+#endif
+
+	while (depth-- && head && pages_present(head))
+		head = dump_backtrace(head);
+
+#ifdef CONFIG_SMP
+	spin_unlock(&current->mm->page_table_lock);
+#endif
+}
diff --git a/arch/i386/oprofile/init.c b/arch/i386/oprofile/init.c
new file mode 100644
index 0000000..c90332d
--- /dev/null
+++ b/arch/i386/oprofile/init.c
@@ -0,0 +1,48 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+ 
+/* We support CPUs that have performance counters like the Pentium Pro
+ * with the NMI mode driver.
+ */
+ 
+extern int nmi_init(struct oprofile_operations * ops);
+extern int nmi_timer_init(struct oprofile_operations * ops);
+extern void nmi_exit(void);
+extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+	int ret;
+
+	ret = -ENODEV;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	ret = nmi_init(ops);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+	if (ret < 0)
+		ret = nmi_timer_init(ops);
+#endif
+	ops->backtrace = x86_backtrace;
+
+	return ret;
+}
+
+
+void oprofile_arch_exit(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	nmi_exit();
+#endif
+}
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
new file mode 100644
index 0000000..3492d96
--- /dev/null
+++ b/arch/i386/oprofile/nmi_int.c
@@ -0,0 +1,427 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+ 
+static int nmi_start(void);
+static void nmi_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int nmi_enabled = 0;
+
+#ifdef CONFIG_PM
+
+static int nmi_suspend(struct sys_device *dev, u32 state)
+{
+	if (nmi_enabled == 1)
+		nmi_stop();
+	return 0;
+}
+
+
+static int nmi_resume(struct sys_device *dev)
+{
+	if (nmi_enabled == 1)
+		nmi_start();
+	return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+	set_kset_name("oprofile"),
+	.resume		= nmi_resume,
+	.suspend	= nmi_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+	.id	= 0,
+	.cls	= &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+	int error;
+	if (!(error = sysdev_class_register(&oprofile_sysclass)))
+		error = sysdev_register(&device_oprofile);
+	return error;
+}
+
+
+static void exit_driverfs(void)
+{
+	sysdev_unregister(&device_oprofile);
+	sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+
+static int nmi_callback(struct pt_regs * regs, int cpu)
+{
+	return model->check_ctrs(regs, &cpu_msrs[cpu]);
+}
+ 
+ 
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrs; ++i) {
+		rdmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrls; ++i) {
+		rdmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+}
+
+
+static void nmi_save_registers(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->fill_in_addresses(msrs);
+	nmi_cpu_save_registers(msrs);
+}
+
+
+static void free_msrs(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		kfree(cpu_msrs[i].counters);
+		cpu_msrs[i].counters = NULL;
+		kfree(cpu_msrs[i].controls);
+		cpu_msrs[i].controls = NULL;
+	}
+}
+
+
+static int allocate_msrs(void)
+{
+	int success = 1;
+	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		if (!cpu_online(i))
+			continue;
+
+		cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
+		if (!cpu_msrs[i].counters) {
+			success = 0;
+			break;
+		}
+		cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
+		if (!cpu_msrs[i].controls) {
+			success = 0;
+			break;
+		}
+	}
+
+	if (!success)
+		free_msrs();
+
+	return success;
+}
+
+
+static void nmi_cpu_setup(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	spin_lock(&oprofilefs_lock);
+	model->setup_ctrs(msrs);
+	spin_unlock(&oprofilefs_lock);
+	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+
+static int nmi_setup(void)
+{
+	if (!allocate_msrs())
+		return -ENOMEM;
+
+	/* We walk a thin line between law and rape here.
+	 * We need to be careful to install our NMI handler
+	 * without actually triggering any NMIs as this will
+	 * break the core code horrifically.
+	 */
+	if (reserve_lapic_nmi() < 0) {
+		free_msrs();
+		return -EBUSY;
+	}
+	/* We need to serialize save and setup for HT because the subset
+	 * of msrs are distinct for save and setup operations
+	 */
+	on_each_cpu(nmi_save_registers, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	set_nmi_callback(nmi_callback);
+	nmi_enabled = 1;
+	return 0;
+}
+
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrls; ++i) {
+		wrmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrs; ++i) {
+		wrmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+}
+ 
+
+static void nmi_cpu_shutdown(void * dummy)
+{
+	unsigned int v;
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+ 
+	/* restoring APIC_LVTPC can trigger an apic error because the delivery
+	 * mode and vector nr combination can be illegal. That's by design: on
+	 * power on apic lvt contain a zero vector nr which are legal only for
+	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
+	 */
+	v = apic_read(APIC_LVTERR);
+	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+	apic_write(APIC_LVTERR, v);
+	nmi_restore_registers(msrs);
+}
+
+ 
+static void nmi_shutdown(void)
+{
+	nmi_enabled = 0;
+	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	unset_nmi_callback();
+	release_lapic_nmi();
+	free_msrs();
+}
+
+ 
+static void nmi_cpu_start(void * dummy)
+{
+	struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+	model->start(msrs);
+}
+ 
+
+static int nmi_start(void)
+{
+	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	return 0;
+}
+ 
+ 
+static void nmi_cpu_stop(void * dummy)
+{
+	struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
+	model->stop(msrs);
+}
+ 
+ 
+static void nmi_stop(void)
+{
+	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+}
+
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int nmi_create_files(struct super_block * sb, struct dentry * root)
+{
+	unsigned int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		struct dentry * dir;
+		char buf[2];
+ 
+		snprintf(buf, 2, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 
+	}
+
+	return 0;
+}
+ 
+ 
+static int __init p4_init(char ** cpu_type)
+{
+	__u8 cpu_model = boot_cpu_data.x86_model;
+
+	if (cpu_model > 4)
+		return 0;
+
+#ifndef CONFIG_SMP
+	*cpu_type = "i386/p4";
+	model = &op_p4_spec;
+	return 1;
+#else
+	switch (smp_num_siblings) {
+		case 1:
+			*cpu_type = "i386/p4";
+			model = &op_p4_spec;
+			return 1;
+
+		case 2:
+			*cpu_type = "i386/p4-ht";
+			model = &op_p4_ht2_spec;
+			return 1;
+	}
+#endif
+
+	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
+	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
+	return 0;
+}
+
+
+static int __init ppro_init(char ** cpu_type)
+{
+	__u8 cpu_model = boot_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		return 0;
+
+	if (cpu_model == 9) {
+		*cpu_type = "i386/p6_mobile";
+	} else if (cpu_model > 5) {
+		*cpu_type = "i386/piii";
+	} else if (cpu_model > 2) {
+		*cpu_type = "i386/pii";
+	} else {
+		*cpu_type = "i386/ppro";
+	}
+
+	model = &op_ppro_spec;
+	return 1;
+}
+
+/* in order to get driverfs right */
+static int using_nmi;
+
+int __init nmi_init(struct oprofile_operations *ops)
+{
+	__u8 vendor = boot_cpu_data.x86_vendor;
+	__u8 family = boot_cpu_data.x86;
+	char *cpu_type;
+
+	if (!cpu_has_apic)
+		return -ENODEV;
+ 
+	switch (vendor) {
+		case X86_VENDOR_AMD:
+			/* Needs to be at least an Athlon (or hammer in 32bit mode) */
+
+			switch (family) {
+			default:
+				return -ENODEV;
+			case 6:
+				model = &op_athlon_spec;
+				cpu_type = "i386/athlon";
+				break;
+			case 0xf:
+				model = &op_athlon_spec;
+				/* Actually it could be i386/hammer too, but give
+				   user space an consistent name. */
+				cpu_type = "x86-64/hammer";
+				break;
+			}
+			break;
+ 
+		case X86_VENDOR_INTEL:
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					if (!p4_init(&cpu_type))
+						return -ENODEV;
+					break;
+
+				/* A P6-class processor */
+				case 6:
+					if (!ppro_init(&cpu_type))
+						return -ENODEV;
+					break;
+
+				default:
+					return -ENODEV;
+			}
+			break;
+
+		default:
+			return -ENODEV;
+	}
+
+	init_driverfs();
+	using_nmi = 1;
+	ops->create_files = nmi_create_files;
+	ops->setup = nmi_setup;
+	ops->shutdown = nmi_shutdown;
+	ops->start = nmi_start;
+	ops->stop = nmi_stop;
+	ops->cpu_type = cpu_type;
+	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
+	return 0;
+}
+
+
+void nmi_exit(void)
+{
+	if (using_nmi)
+		exit_driverfs();
+}
diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c
new file mode 100644
index 0000000..b2e462a
--- /dev/null
+++ b/arch/i386/oprofile/nmi_timer_int.c
@@ -0,0 +1,55 @@
+/**
+ * @file nmi_timer_int.c
+ *
+ * @remark Copyright 2003 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Zwane Mwaikambo <zwane@linuxpower.ca>
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/oprofile.h>
+#include <linux/rcupdate.h>
+
+
+#include <asm/nmi.h>
+#include <asm/apic.h>
+#include <asm/ptrace.h>
+ 
+static int nmi_timer_callback(struct pt_regs * regs, int cpu)
+{
+	oprofile_add_sample(regs, 0);
+	return 1;
+}
+
+static int timer_start(void)
+{
+	disable_timer_nmi_watchdog();
+	set_nmi_callback(nmi_timer_callback);
+	return 0;
+}
+
+
+static void timer_stop(void)
+{
+	enable_timer_nmi_watchdog();
+	unset_nmi_callback();
+	synchronize_kernel();
+}
+
+
+int __init nmi_timer_init(struct oprofile_operations * ops)
+{
+	extern int nmi_active;
+
+	if (nmi_active <= 0)
+		return -ENODEV;
+
+	ops->start = timer_start;
+	ops->stop = timer_stop;
+	ops->cpu_type = "timer";
+	printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
+	return 0;
+}
diff --git a/arch/i386/oprofile/op_counter.h b/arch/i386/oprofile/op_counter.h
new file mode 100644
index 0000000..2880b15
--- /dev/null
+++ b/arch/i386/oprofile/op_counter.h
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
new file mode 100644
index 0000000..3ad9a72
--- /dev/null
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -0,0 +1,149 @@
+/**
+ * @file op_model_athlon.h
+ * athlon / K7 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void athlon_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_K7_PERFCTR0;
+	msrs->counters[1].addr = MSR_K7_PERFCTR1;
+	msrs->counters[2].addr = MSR_K7_PERFCTR2;
+	msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+	msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+	msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+	msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+	msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+}
+
+ 
+static void athlon_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+ 
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+ 
+static int athlon_check_ctrs(struct pt_regs * const regs,
+			     struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			oprofile_add_sample(regs, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+		}
+	}
+
+	/* See op_model_ppro.c */
+	return 1;
+}
+
+ 
+static void athlon_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (reset_value[i]) {
+			CTRL_READ(low, high, msrs, i);
+			CTRL_SET_ACTIVE(low);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+
+static void athlon_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	int i;
+
+	/* Subtle: stop on all counters to avoid race with
+	 * setting our pm callback */
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_SET_INACTIVE(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+}
+
+
+struct op_x86_model_spec const op_athlon_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &athlon_fill_in_addresses,
+	.setup_ctrs = &athlon_setup_ctrs,
+	.check_ctrs = &athlon_check_ctrs,
+	.start = &athlon_start,
+	.stop = &athlon_stop
+};
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
new file mode 100644
index 0000000..ac8a066
--- /dev/null
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -0,0 +1,725 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+#include <asm/ptrace.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+	if (smp_num_siblings == 2)
+		num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+	return smp_num_siblings == 2 ? 2 : 1;
+#else
+	return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+	int virt_counter;
+	int counter_address;
+	int cccr_address;
+};
+
+struct p4_event_binding {
+	int escr_select;  /* value to put in CCCR */
+	int event_select; /* value to put in ESCR */
+	struct {
+		int virt_counter; /* for this counter... */
+		int escr_address; /* use this ESCR       */
+	} bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+	
+	{ /* BRANCH_RETIRED */
+		0x05, 0x06, 
+		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+	
+	{ /* MISPRED_BRANCH_RETIRED */
+		0x04, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+	
+	{ /* TC_DELIVER_MODE */
+		0x01, 0x01,
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
+	},
+	
+	{ /* BPU_FETCH_REQUEST */
+		0x00, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+	},
+
+	{ /* ITLB_REFERENCE */
+		0x03, 0x18,
+		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+	},
+
+	{ /* MEMORY_CANCEL */
+		0x05, 0x02,
+		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+	},
+
+	{ /* MEMORY_COMPLETE */
+		0x02, 0x08,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* LOAD_PORT_REPLAY */
+		0x02, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* STORE_PORT_REPLAY */
+		0x02, 0x05,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* MOB_LOAD_REPLAY */
+		0x02, 0x03,
+		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+	},
+
+	{ /* PAGE_WALK_TYPE */
+		0x04, 0x01,
+		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+	},
+
+	{ /* BSQ_CACHE_REFERENCE */
+		0x07, 0x0c, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+	},
+
+	{ /* IOQ_ALLOCATION */
+		0x06, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* IOQ_ACTIVE_ENTRIES */
+		0x06, 0x1a, 
+		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+		  { 0, 0 } }
+	},
+
+	{ /* FSB_DATA_ACTIVITY */
+		0x06, 0x17, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+
+	{ /* BSQ_ALLOCATION */
+		0x07, 0x05, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* BSQ_ACTIVE_ENTRIES */
+		0x07, 0x06,
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		  { 0, 0 } }
+	},
+
+	{ /* X87_ASSIST */
+		0x05, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* SSE_INPUT_ASSIST */
+		0x01, 0x34,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_SP_UOP */
+		0x01, 0x08, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_DP_UOP */
+		0x01, 0x0c, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_SP_UOP */
+		0x01, 0x0a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_DP_UOP */
+		0x01, 0x0e,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* 64BIT_MMX_UOP */
+		0x01, 0x02, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* 128BIT_MMX_UOP */
+		0x01, 0x1a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* X87_FP_UOP */
+		0x01, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* X87_SIMD_MOVES_UOP */
+		0x01, 0x2e, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* MACHINE_CLEAR */
+		0x05, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* GLOBAL_POWER_EVENTS */
+		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+  
+	{ /* TC_MS_XFER */
+		0x00, 0x05, 
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* UOP_QUEUE_WRITES */
+		0x00, 0x09,
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* FRONT_END_EVENT */
+		0x05, 0x08,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* EXECUTION_EVENT */
+		0x05, 0x0c,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* REPLAY_EVENT */
+		0x05, 0x09,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* INSTR_RETIRED */
+		0x04, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOPS_RETIRED */
+		0x04, 0x01,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOP_TYPE */    
+		0x02, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+	},
+
+	{ /* RETIRED_MISPRED_BRANCH_TYPE */
+		0x02, 0x05, 
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	},
+
+	{ /* RETIRED_BRANCH_TYPE */
+		0x02, 0x04,
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	}
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+	int cpu = smp_processor_id();
+	return (cpu != first_cpu(cpu_sibling_map[cpu]));
+#endif	
+	return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+	unsigned int i; 
+	unsigned int addr, stag;
+
+	setup_num_counters();
+	stag = get_stagger();
+
+	/* the counter registers we pay attention to */
+	for (i = 0; i < num_counters; ++i) {
+		msrs->counters[i].addr = 
+			p4_counters[VIRT_CTR(stag, i)].counter_address;
+	}
+
+	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+	/* 18 CCCR registers */
+	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	/* 43 ESCR registers in three or four discontiguous group */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+
+	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+	 * to avoid special case in nmi_{save|restore}_registers() */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		for (addr = MSR_P4_BSU_ESCR0 + stag;
+		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	} else {
+		for (addr = MSR_P4_IQ_ESCR0 + stag;
+		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+
+	/* there are 2 remaining non-contiguously located ESCRs */
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		/* standard non-HT CPUs handle both remaining ESCRs*/
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else if (stag == 0) {
+		/* HT CPUs give the first remainder to the even thread, as
+		   the 32nd control register */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else {
+		/* and two copies of the second to the odd thread,
+		   for the 22st and 23nd control registers */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+	}
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+	int i;
+	int const maxbind = 2;
+	unsigned int cccr = 0;
+	unsigned int escr = 0;
+	unsigned int high = 0;
+	unsigned int counter_bit;
+	struct p4_event_binding *ev = NULL;
+	unsigned int stag;
+
+	stag = get_stagger();
+	
+	/* convert from counter *number* to counter *bit* */
+	counter_bit = 1 << VIRT_CTR(stag, ctr);
+	
+	/* find our event binding structure. */
+	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+		printk(KERN_ERR 
+		       "oprofile: P4 event code 0x%lx out of range\n", 
+		       counter_config[ctr].event);
+		return;
+	}
+	
+	ev = &(p4_events[counter_config[ctr].event - 1]);
+	
+	for (i = 0; i < maxbind; i++) {
+		if (ev->bindings[i].virt_counter & counter_bit) {
+
+			/* modify ESCR */
+			ESCR_READ(escr, high, ev, i);
+			ESCR_CLEAR(escr);
+			if (stag == 0) {
+				ESCR_SET_USR_0(escr, counter_config[ctr].user);
+				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+			} else {
+				ESCR_SET_USR_1(escr, counter_config[ctr].user);
+				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+			}
+			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_WRITE(escr, high, ev, i);
+		       
+			/* modify CCCR */
+			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			CCCR_CLEAR(cccr);
+			CCCR_SET_REQUIRED_BITS(cccr);
+			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+			if (stag == 0) {
+				CCCR_SET_PMI_OVF_0(cccr);
+			} else {
+				CCCR_SET_PMI_OVF_1(cccr);
+			}
+			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			return;
+		}
+	}
+
+	printk(KERN_ERR 
+	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+	       counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int i;
+	unsigned int low, high;
+	unsigned int addr;
+	unsigned int stag;
+
+	stag = get_stagger();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+	if (! MISC_PMC_ENABLED_P(low)) {
+		printk(KERN_ERR "oprofile: P4 PMC not available\n");
+		return;
+	}
+
+	/* clear the cccrs we will use */
+	for (i = 0 ; i < num_counters ; i++) {
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+	}
+
+	/* clear cccrs outside our concern */
+	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+		rdmsr(p4_unused_cccr[i], low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_unused_cccr[i], low, high);
+	}
+
+	/* clear all escrs (including those outside our concern) */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+
+	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
+	if (boot_cpu_data.x86_model < 0x3) {
+		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	} else if (stag == 0) {
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+	} else {
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	}		
+	
+	/* setup all counters */
+	for (i = 0 ; i < num_counters ; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+			pmc_setup_one_p4_counter(i);
+			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+
+static int p4_check_ctrs(struct pt_regs * const regs,
+			 struct op_msrs const * const msrs)
+{
+	unsigned long ctr, low, high, stag, real;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		
+		if (!reset_value[i]) 
+			continue;
+
+		/* 
+		 * there is some eccentricity in the hardware which
+		 * requires that we perform 2 extra corrections:
+		 *
+		 * - check both the CCCR:OVF flag for overflow and the
+		 *   counter high bit for un-flagged overflows.
+		 *
+		 * - write the counter back twice to ensure it gets
+		 *   updated properly.
+		 * 
+		 * the former seems to be related to extra NMIs happening
+		 * during the current NMI; the latter is reported as errata
+		 * N15 in intel doc 249199-029, pentium 4 specification
+		 * update, though their suggested work-around does not
+		 * appear to solve the problem.
+		 */
+		
+		real = VIRT_CTR(stag, i);
+
+		CCCR_READ(low, high, real);
+ 		CTR_READ(ctr, high, real);
+		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+			oprofile_add_sample(regs, i);
+ 			CTR_WRITE(reset_value[i], real);
+			CCCR_CLEAR_OVF(low);
+			CCCR_WRITE(low, high, real);
+ 			CTR_WRITE(reset_value[i], real);
+		}
+	}
+
+	/* P4 quirk: you have to re-unmask the apic vector */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* See op_model_ppro.c */
+	return 1;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i])
+			continue;
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_ENABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_DISABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+	.num_counters = NUM_COUNTERS_HT2,
+	.num_controls = NUM_CONTROLS_HT2,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+	.num_counters = NUM_COUNTERS_NON_HT,
+	.num_controls = NUM_CONTROLS_NON_HT,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
new file mode 100644
index 0000000..d719015
--- /dev/null
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -0,0 +1,143 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_P6_PERFCTR0;
+	msrs->counters[1].addr = MSR_P6_PERFCTR1;
+	
+	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+ 
+static int ppro_check_ctrs(struct pt_regs * const regs,
+			   struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+ 
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			oprofile_add_sample(regs, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+		}
+	}
+
+	/* Only P6 based Pentium M need to re-unmask the apic vector but it
+	 * doesn't hurt other P6 variant */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* We can't work out if we really handled an interrupt. We
+	 * might have caught a *second* counter just after overflowing
+	 * the interrupt for this counter then arrives
+	 * and we don't find a counter that's overflowed, so we
+	 * would return 0 and get dazed + confused. Instead we always
+	 * assume we found an overflow. This sucks.
+	 */
+	return 1;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_ACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_INACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop
+};
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h
new file mode 100644
index 0000000..123b7e9
--- /dev/null
+++ b/arch/i386/oprofile/op_x86_model.h
@@ -0,0 +1,50 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+	unsigned int high;
+	unsigned int low;
+};
+
+struct op_msr {
+	unsigned long addr;
+	struct op_saved_msr saved;
+};
+
+struct op_msrs {
+	struct op_msr * counters;
+	struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+	unsigned int const num_counters;
+	unsigned int const num_controls;
+	void (*fill_in_addresses)(struct op_msrs * const msrs);
+	void (*setup_ctrs)(struct op_msrs const * const msrs);
+	int (*check_ctrs)(struct pt_regs * const regs,
+		struct op_msrs const * const msrs);
+	void (*start)(struct op_msrs const * const msrs);
+	void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */