Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6: (29 commits)
  [IA64] BUG to BUG_ON changes
  [IA64] Fix typo/thinko in arch/ia64/sn/kernel/sn2/sn2_smp.c
  ia64: remove some warnings.
  ia64/xen: fix the link error.
  ia64/pv_ops/bp/xen: implemented binary patchable pv_cpu_ops.
  ia64/pv_ops/binary patch: define paravirt_dv_serialize_data() and suppress false positive warning.
  ia64/pv_ops/bp/module: support binary patching for kernel module.
  ia64/pv_ops: implement binary patching optimization for native.
  ia64/pv_op/binarypatch: add helper functions to support binary patching for paravirt_ops.
  ia64/pv_ops/xen/gate.S: xen gate page paravirtualization
  ia64/pv_ops: paravirtualize gate.S.
  ia64/pv_ops: move down __kernel_syscall_via_epc.
  ia64/pv_ops/xen: define xen specific gate page.
  ia64/pv_ops: gate page paravirtualization.
  ia64/pv_ops/xen/pv_time_ops: implement sched_clock.
  ia64/pv_ops/pv_time_ops: add sched_clock hook.
  ia64/pv_ops/xen: paravirtualize read/write ar.itc and ar.itm
  ia64/pv_ops: paravirtualize mov = ar.itc.
  ia64/pv_ops/pvchecker: support mov = ar.itc paravirtualization
  ia64/pv_ops: paravirtualize fsys.S.
  ...
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
index c47830e..111ed52 100644
--- a/arch/ia64/include/asm/intrinsics.h
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -202,7 +202,11 @@
 
 #ifndef __ASSEMBLY__
 #if defined(CONFIG_PARAVIRT) && defined(__KERNEL__)
-#define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
+#ifdef ASM_SUPPORTED
+# define IA64_INTRINSIC_API(name)	paravirt_ ## name
+#else
+# define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
+#endif
 #define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
 #else
 #define IA64_INTRINSIC_API(name)	ia64_native_ ## name
diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h
index 040bc87..7f2a456 100644
--- a/arch/ia64/include/asm/mmu_context.h
+++ b/arch/ia64/include/asm/mmu_context.h
@@ -87,7 +87,7 @@
 	/* re-check, now that we've got the lock: */
 	context = mm->context;
 	if (context == 0) {
-		cpus_clear(mm->cpu_vm_mask);
+		cpumask_clear(mm_cpumask(mm));
 		if (ia64_ctx.next >= ia64_ctx.limit) {
 			ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
 					ia64_ctx.max_ctx, ia64_ctx.next);
@@ -166,8 +166,8 @@
 
 	do {
 		context = get_mmu_context(mm);
-		if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
-			cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+		if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
+			cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 		reload_context(context);
 		/*
 		 * in the unlikely event of a TLB-flush by another thread,
diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h
index d2da61e..908eaef 100644
--- a/arch/ia64/include/asm/module.h
+++ b/arch/ia64/include/asm/module.h
@@ -16,6 +16,12 @@
 	struct elf64_shdr *got;		/* global offset table */
 	struct elf64_shdr *opd;		/* official procedure descriptors */
 	struct elf64_shdr *unwind;	/* unwind-table section */
+#ifdef CONFIG_PARAVIRT
+	struct elf64_shdr *paravirt_bundles;
+					/* paravirt_alt_bundle_patch table */
+	struct elf64_shdr *paravirt_insts;
+					/* paravirt_alt_inst_patch table */
+#endif
 	unsigned long gp;		/* global-pointer for module */
 
 	void *core_unw_table;		/* core unwind-table cookie returned by unwinder */
diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
index 0a1026c..d2d46ef 100644
--- a/arch/ia64/include/asm/native/inst.h
+++ b/arch/ia64/include/asm/native/inst.h
@@ -30,6 +30,9 @@
 #define __paravirt_work_processed_syscall_target \
 						ia64_work_processed_syscall
 
+#define paravirt_fsyscall_table			ia64_native_fsyscall_table
+#define paravirt_fsys_bubble_down		ia64_native_fsys_bubble_down
+
 #ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK
 # define PARAVIRT_POISON	0xdeadbeefbaadf00d
 # define CLOBBER(clob)				\
@@ -74,6 +77,11 @@
 (pred)	mov reg = psr			\
 	CLOBBER(clob)
 
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+(pred)	mov reg = ar.itc				\
+	CLOBBER(clob)					\
+	CLOBBER_PRED(pred_clob)
+
 #define MOV_TO_IFA(reg, clob)	\
 	mov cr.ifa = reg	\
 	CLOBBER(clob)
@@ -158,6 +166,11 @@
 #define RSM_PSR_DT		\
 	rsm psr.dt
 
+#define RSM_PSR_BE_I(clob0, clob1)	\
+	rsm psr.be | psr.i		\
+	CLOBBER(clob0)			\
+	CLOBBER(clob1)
+
 #define SSM_PSR_DT_AND_SRLZ_I	\
 	ssm psr.dt		\
 	;;			\
diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h
new file mode 100644
index 0000000..be16ca9
--- /dev/null
+++ b/arch/ia64/include/asm/native/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist		\
+	__ia64_native_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist			\
+	__ia64_native_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist	\
+	__ia64_native_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist	\
+	__ia64_native_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist			\
+	__ia64_native_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist			\
+	__ia64_native_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist		\
+	__ia64_native_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist		\
+	__ia64_native_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h
index b8e6eb1..8d72962 100644
--- a/arch/ia64/include/asm/native/pvchk_inst.h
+++ b/arch/ia64/include/asm/native/pvchk_inst.h
@@ -180,6 +180,11 @@
 	IS_PRED_IN(pred)			\
 	IS_RREG_OUT(reg)			\
 	IS_RREG_CLOB(clob)
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+	IS_PRED_IN(pred)				\
+	IS_PRED_CLOB(pred_clob)				\
+	IS_RREG_OUT(reg)				\
+	IS_RREG_CLOB(clob)
 #define MOV_TO_IFA(reg, clob)			\
 	IS_RREG_IN(reg)				\
 	IS_RREG_CLOB(clob)
@@ -246,6 +251,9 @@
 	IS_RREG_CLOB(clob2)
 #define RSM_PSR_DT				\
 	nop 0
+#define RSM_PSR_BE_I(clob0, clob1)		\
+	IS_RREG_CLOB(clob0)			\
+	IS_RREG_CLOB(clob1)
 #define SSM_PSR_DT_AND_SRLZ_I			\
 	nop 0
 #define BSW_0(clob0, clob1, clob2)		\
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 2bf3636..2eb0a98 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -22,6 +22,56 @@
 #ifndef __ASM_PARAVIRT_H
 #define __ASM_PARAVIRT_H
 
+#ifndef __ASSEMBLY__
+/******************************************************************************
+ * fsys related addresses
+ */
+struct pv_fsys_data {
+	unsigned long *fsyscall_table;
+	void *fsys_bubble_down;
+};
+
+extern struct pv_fsys_data pv_fsys_data;
+
+unsigned long *paravirt_get_fsyscall_table(void);
+char *paravirt_get_fsys_bubble_down(void);
+
+/******************************************************************************
+ * patchlist addresses for gate page
+ */
+enum pv_gate_patchlist {
+	PV_GATE_START_FSYSCALL,
+	PV_GATE_END_FSYSCALL,
+
+	PV_GATE_START_BRL_FSYS_BUBBLE_DOWN,
+	PV_GATE_END_BRL_FSYS_BUBBLE_DOWN,
+
+	PV_GATE_START_VTOP,
+	PV_GATE_END_VTOP,
+
+	PV_GATE_START_MCKINLEY_E9,
+	PV_GATE_END_MCKINLEY_E9,
+};
+
+struct pv_patchdata {
+	unsigned long start_fsyscall_patchlist;
+	unsigned long end_fsyscall_patchlist;
+	unsigned long start_brl_fsys_bubble_down_patchlist;
+	unsigned long end_brl_fsys_bubble_down_patchlist;
+	unsigned long start_vtop_patchlist;
+	unsigned long end_vtop_patchlist;
+	unsigned long start_mckinley_e9_patchlist;
+	unsigned long end_mckinley_e9_patchlist;
+
+	void *gate_section;
+};
+
+extern struct pv_patchdata pv_patchdata;
+
+unsigned long paravirt_get_gate_patchlist(enum pv_gate_patchlist type);
+void *paravirt_get_gate_section(void);
+#endif
+
 #ifdef CONFIG_PARAVIRT_GUEST
 
 #define PARAVIRT_HYPERVISOR_TYPE_DEFAULT	0
@@ -68,6 +118,14 @@
 	int (*arch_setup_nomca)(void);
 
 	void (*post_smp_prepare_boot_cpu)(void);
+
+#ifdef ASM_SUPPORTED
+	unsigned long (*patch_bundle)(void *sbundle, void *ebundle,
+				      unsigned long type);
+	unsigned long (*patch_inst)(unsigned long stag, unsigned long etag,
+				    unsigned long type);
+#endif
+	void (*patch_branch)(unsigned long tag, unsigned long type);
 };
 
 extern struct pv_init_ops pv_init_ops;
@@ -210,6 +268,8 @@
 	int (*do_steal_accounting)(unsigned long *new_itm);
 
 	void (*clocksource_resume)(void);
+
+	unsigned long long (*sched_clock)(void);
 };
 
 extern struct pv_time_ops pv_time_ops;
@@ -227,6 +287,11 @@
 	return pv_time_ops.do_steal_accounting(new_itm);
 }
 
+static inline unsigned long long paravirt_sched_clock(void)
+{
+	return pv_time_ops.sched_clock();
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #else
diff --git a/arch/ia64/include/asm/paravirt_patch.h b/arch/ia64/include/asm/paravirt_patch.h
new file mode 100644
index 0000000..128ff5d
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt_patch.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __ASM_PARAVIRT_PATCH_H
+#define __ASM_PARAVIRT_PATCH_H
+
+#ifdef __ASSEMBLY__
+
+	.section .paravirt_branches, "a"
+	.previous
+#define PARAVIRT_PATCH_SITE_BR(type)		\
+	{					\
+	[1:] ;					\
+	br.cond.sptk.many 2f ;			\
+	nop.b 0 ;				\
+	nop.b 0;; ;				\
+	} ;					\
+	2:					\
+	.xdata8 ".paravirt_branches", 1b, type
+
+#else
+
+#include <linux/stringify.h>
+#include <asm/intrinsics.h>
+
+/* for binary patch */
+struct paravirt_patch_site_bundle {
+	void		*sbundle;
+	void		*ebundle;
+	unsigned long	type;
+};
+
+/* label means the beginning of new bundle */
+#define paravirt_alt_bundle(instr, privop)				\
+	"\t998:\n"							\
+	"\t" instr "\n"							\
+	"\t999:\n"							\
+	"\t.pushsection .paravirt_bundles, \"a\"\n"			\
+	"\t.popsection\n"						\
+	"\t.xdata8 \".paravirt_bundles\", 998b, 999b, "			\
+	__stringify(privop) "\n"
+
+
+struct paravirt_patch_bundle_elem {
+	const void	*sbundle;
+	const void	*ebundle;
+	unsigned long	type;
+};
+
+
+struct paravirt_patch_site_inst {
+	unsigned long	stag;
+	unsigned long	etag;
+	unsigned long	type;
+};
+
+#define paravirt_alt_inst(instr, privop)				\
+	"\t[998:]\n"							\
+	"\t" instr "\n"							\
+	"\t[999:]\n"							\
+	"\t.pushsection .paravirt_insts, \"a\"\n"			\
+	"\t.popsection\n"						\
+	"\t.xdata8 \".paravirt_insts\", 998b, 999b, "			\
+	__stringify(privop) "\n"
+
+struct paravirt_patch_site_branch {
+	unsigned long	tag;
+	unsigned long	type;
+};
+
+struct paravirt_patch_branch_target {
+	const void	*entry;
+	unsigned long	type;
+};
+
+void
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries);
+
+void
+paravirt_patch_reloc_br(unsigned long tag, const void *target);
+
+void
+paravirt_patch_reloc_brl(unsigned long tag, const void *target);
+
+
+#if defined(ASM_SUPPORTED) && defined(CONFIG_PARAVIRT)
+unsigned long
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+
+unsigned long
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found);
+
+void
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end);
+
+void
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end);
+
+void paravirt_patch_apply(void);
+#else
+#define paravirt_patch_apply_bundle(start, end)	do { } while (0)
+#define paravirt_patch_apply_inst(start, end)	do { } while (0)
+#define paravirt_patch_apply()			do { } while (0)
+#endif
+
+#endif /* !__ASSEMBLEY__ */
+
+#endif /* __ASM_PARAVIRT_PATCH_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h
index 33c8e55..3d29511 100644
--- a/arch/ia64/include/asm/paravirt_privop.h
+++ b/arch/ia64/include/asm/paravirt_privop.h
@@ -33,7 +33,7 @@
  */
 
 struct pv_cpu_ops {
-	void (*fc)(unsigned long addr);
+	void (*fc)(void *addr);
 	unsigned long (*thash)(unsigned long addr);
 	unsigned long (*get_cpuid)(int index);
 	unsigned long (*get_pmd)(int index);
@@ -60,12 +60,18 @@
 /* Instructions paravirtualized for performance */
 /************************************************/
 
+#ifndef ASM_SUPPORTED
+#define paravirt_ssm_i()	pv_cpu_ops.ssm_i()
+#define paravirt_rsm_i()	pv_cpu_ops.rsm_i()
+#define __paravirt_getreg()	pv_cpu_ops.getreg()
+#endif
+
 /* mask for ia64_native_ssm/rsm() must be constant.("i" constraing).
  * static inline function doesn't satisfy it. */
 #define paravirt_ssm(mask)			\
 	do {					\
 		if ((mask) == IA64_PSR_I)	\
-			pv_cpu_ops.ssm_i();	\
+			paravirt_ssm_i();	\
 		else				\
 			ia64_native_ssm(mask);	\
 	} while (0)
@@ -73,7 +79,7 @@
 #define paravirt_rsm(mask)			\
 	do {					\
 		if ((mask) == IA64_PSR_I)	\
-			pv_cpu_ops.rsm_i();	\
+			paravirt_rsm_i();	\
 		else				\
 			ia64_native_rsm(mask);	\
 	} while (0)
@@ -86,7 +92,7 @@
 		if ((reg) == _IA64_REG_IP)			\
 			res = ia64_native_getreg(_IA64_REG_IP); \
 		else						\
-			res = pv_cpu_ops.getreg(reg);		\
+			res = __paravirt_getreg(reg);		\
 		res;						\
 	})
 
@@ -112,6 +118,12 @@
 
 #endif /* CONFIG_PARAVIRT */
 
+#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED)
+#define paravirt_dv_serialize_data()	ia64_dv_serialize_data()
+#else
+#define paravirt_dv_serialize_data()	/* nothing */
+#endif
+
 /* these routines utilize privilege-sensitive or performance-sensitive
  * privileged instructions so the code must be replaced with
  * paravirtualized versions */
@@ -121,4 +133,349 @@
 	IA64_PARAVIRT_ASM_FUNC(work_processed_syscall)
 #define ia64_leave_kernel		IA64_PARAVIRT_ASM_FUNC(leave_kernel)
 
+
+#if defined(CONFIG_PARAVIRT)
+/******************************************************************************
+ * binary patching infrastructure
+ */
+#define PARAVIRT_PATCH_TYPE_FC				1
+#define PARAVIRT_PATCH_TYPE_THASH			2
+#define PARAVIRT_PATCH_TYPE_GET_CPUID			3
+#define PARAVIRT_PATCH_TYPE_GET_PMD			4
+#define PARAVIRT_PATCH_TYPE_PTCGA			5
+#define PARAVIRT_PATCH_TYPE_GET_RR			6
+#define PARAVIRT_PATCH_TYPE_SET_RR			7
+#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4		8
+#define PARAVIRT_PATCH_TYPE_SSM_I			9
+#define PARAVIRT_PATCH_TYPE_RSM_I			10
+#define PARAVIRT_PATCH_TYPE_GET_PSR_I			11
+#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE	12
+
+/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */
+#define PARAVIRT_PATCH_TYPE_GETREG			0x10000000
+#define PARAVIRT_PATCH_TYPE_SETREG			0x20000000
+
+/*
+ * struct task_struct* (*ia64_switch_to)(void* next_task);
+ * void *ia64_leave_syscall;
+ * void *ia64_work_processed_syscall
+ * void *ia64_leave_kernel;
+ */
+
+#define PARAVIRT_PATCH_TYPE_BR_START			0x30000000
+#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 0)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 1)
+#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL	\
+	(PARAVIRT_PATCH_TYPE_BR_START + 2)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL		\
+	(PARAVIRT_PATCH_TYPE_BR_START + 3)
+
+#ifdef ASM_SUPPORTED
+#include <asm/paravirt_patch.h>
+
+/*
+ * pv_cpu_ops calling stub.
+ * normal function call convension can't be written by gcc
+ * inline assembly.
+ *
+ * from the caller's point of view,
+ * the following registers will be clobbered.
+ * r2, r3
+ * r8-r15
+ * r16, r17
+ * b6, b7
+ * p6-p15
+ * ar.ccv
+ *
+ * from the callee's point of view ,
+ * the following registers can be used.
+ * r2, r3: scratch
+ * r8: scratch, input argument0 and return value
+ * r0-r15: scratch, input argument1-5
+ * b6: return pointer
+ * b7: scratch
+ * p6-p15: scratch
+ * ar.ccv: scratch
+ *
+ * other registers must not be changed. especially
+ * b0: rp: preserved. gcc ignores b0 in clobbered register.
+ * r16: saved gp
+ */
+/* 5 bundles */
+#define __PARAVIRT_BR							\
+	";;\n"								\
+	"{ .mlx\n"							\
+	"nop 0\n"							\
+	"movl r2 = %[op_addr]\n"/* get function pointer address */	\
+	";;\n"								\
+	"}\n"								\
+	"1:\n"								\
+	"{ .mii\n"							\
+	"ld8 r2 = [r2]\n"	/* load function descriptor address */	\
+	"mov r17 = ip\n"	/* get ip to calc return address */	\
+	"mov r16 = gp\n"	/* save gp */				\
+	";;\n"								\
+	"}\n"								\
+	"{ .mii\n"							\
+	"ld8 r3 = [r2], 8\n"	/* load entry address */		\
+	"adds r17 =  1f - 1b, r17\n"	/* calculate return address */	\
+	";;\n"								\
+	"mov b7 = r3\n"		/* set entry address */			\
+	"}\n"								\
+	"{ .mib\n"							\
+	"ld8 gp = [r2]\n"	/* load gp value */			\
+	"mov b6 = r17\n"	/* set return address */		\
+	"br.cond.sptk.few b7\n"	/* intrinsics are very short isns */	\
+	"}\n"								\
+	"1:\n"								\
+	"{ .mii\n"							\
+	"mov gp = r16\n"	/* restore gp value */			\
+	"nop 0\n"							\
+	"nop 0\n"							\
+	";;\n"								\
+	"}\n"
+
+#define PARAVIRT_OP(op)				\
+	[op_addr] "i"(&pv_cpu_ops.op)
+
+#define PARAVIRT_TYPE(type)			\
+	PARAVIRT_PATCH_TYPE_ ## type
+
+#define PARAVIRT_REG_CLOBBERS0					\
+	"r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14",	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS1					\
+	"r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14",		\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS2					\
+	"r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14",	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS5					\
+	"r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/	\
+		"r15", "r16", "r17"
+
+#define PARAVIRT_BR_CLOBBERS			\
+	"b6", "b7"
+
+#define PARAVIRT_PR_CLOBBERS						\
+	"p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
+
+#define PARAVIRT_AR_CLOBBERS			\
+	"ar.ccv"
+
+#define PARAVIRT_CLOBBERS0			\
+		PARAVIRT_REG_CLOBBERS0,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS1			\
+		PARAVIRT_REG_CLOBBERS1,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS2			\
+		PARAVIRT_REG_CLOBBERS2,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_CLOBBERS5			\
+		PARAVIRT_REG_CLOBBERS5,		\
+		PARAVIRT_BR_CLOBBERS,		\
+		PARAVIRT_PR_CLOBBERS,		\
+		PARAVIRT_AR_CLOBBERS,		\
+		"memory"
+
+#define PARAVIRT_BR0(op, type)					\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op)				\
+		      : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR0_RET(op, type)				\
+	register unsigned long ia64_intri_res asm ("r8");	\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      : "=r"(ia64_intri_res)			\
+		      : PARAVIRT_OP(op)				\
+		      : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR1(op, type, arg1)				\
+	register unsigned long __##arg1 asm ("r8") = arg1;	\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_RET(op, type, arg1)			\
+	register unsigned long ia64_intri_res asm ("r8");	\
+	register unsigned long __##arg1 asm ("r8") = arg1;	\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      : "=r"(ia64_intri_res)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_VOID(op, type, arg1)			\
+	register void *__##arg1 asm ("r8") = arg1;		\
+	register unsigned long ia64_clobber asm ("r8");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+					  PARAVIRT_TYPE(type))	\
+		      :	"=r"(ia64_clobber)			\
+		      : PARAVIRT_OP(op), "0"(__##arg1)		\
+		      : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR2(op, type, arg1, arg2)				\
+	register unsigned long __##arg1 asm ("r8") = arg1;		\
+	register unsigned long __##arg2 asm ("r9") = arg2;		\
+	register unsigned long ia64_clobber1 asm ("r8");		\
+	register unsigned long ia64_clobber2 asm ("r9");		\
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,		\
+					  PARAVIRT_TYPE(type))		\
+		      : "=r"(ia64_clobber1), "=r"(ia64_clobber2)	\
+		      : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2)	\
+		      : PARAVIRT_CLOBBERS2)
+
+
+#define PARAVIRT_DEFINE_CPU_OP0(op, type)		\
+	static inline void				\
+	paravirt_ ## op (void)				\
+	{						\
+		PARAVIRT_BR0(op, type);			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type)		\
+	static inline unsigned long			\
+	paravirt_ ## op (void)				\
+	{						\
+		PARAVIRT_BR0_RET(op, type);		\
+		return ia64_intri_res;			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1_VOID(op, type)		\
+	static inline void				\
+	paravirt_ ## op (void *arg1)			\
+	{						\
+		PARAVIRT_BR1_VOID(op, type, arg1);	\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1(op, type)		\
+	static inline void				\
+	paravirt_ ## op (unsigned long arg1)		\
+	{						\
+		PARAVIRT_BR1(op, type, arg1);		\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type)		\
+	static inline unsigned long			\
+	paravirt_ ## op (unsigned long arg1)		\
+	{						\
+		PARAVIRT_BR1_RET(op, type, arg1);	\
+		return ia64_intri_res;			\
+	}
+
+#define PARAVIRT_DEFINE_CPU_OP2(op, type)		\
+	static inline void				\
+	paravirt_ ## op (unsigned long arg1,		\
+			 unsigned long arg2)		\
+	{						\
+		PARAVIRT_BR2(op, type, arg1, arg2);	\
+	}
+
+
+PARAVIRT_DEFINE_CPU_OP1_VOID(fc, FC);
+PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD)
+PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR)
+PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR)
+PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I)
+PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I)
+PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I)
+PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE)
+
+static inline void
+paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+			unsigned long val2, unsigned long val3,
+			unsigned long val4)
+{
+	register unsigned long __val0 asm ("r8") = val0;
+	register unsigned long __val1 asm ("r9") = val1;
+	register unsigned long __val2 asm ("r10") = val2;
+	register unsigned long __val3 asm ("r11") = val3;
+	register unsigned long __val4 asm ("r14") = val4;
+
+	register unsigned long ia64_clobber0 asm ("r8");
+	register unsigned long ia64_clobber1 asm ("r9");
+	register unsigned long ia64_clobber2 asm ("r10");
+	register unsigned long ia64_clobber3 asm ("r11");
+	register unsigned long ia64_clobber4 asm ("r14");
+
+	asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,
+					  PARAVIRT_TYPE(SET_RR0_TO_RR4))
+		      : "=r"(ia64_clobber0),
+			"=r"(ia64_clobber1),
+			"=r"(ia64_clobber2),
+			"=r"(ia64_clobber3),
+			"=r"(ia64_clobber4)
+		      : PARAVIRT_OP(set_rr0_to_rr4),
+			"0"(__val0), "1"(__val1), "2"(__val2),
+			"3"(__val3), "4"(__val4)
+		      : PARAVIRT_CLOBBERS5);
+}
+
+/* unsigned long paravirt_getreg(int reg) */
+#define __paravirt_getreg(reg)						\
+	({								\
+		register unsigned long ia64_intri_res asm ("r8");	\
+		register unsigned long __reg asm ("r8") = (reg);	\
+									\
+		BUILD_BUG_ON(!__builtin_constant_p(reg));		\
+		asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+						  PARAVIRT_TYPE(GETREG) \
+						  + (reg))		\
+			      : "=r"(ia64_intri_res)			\
+			      : PARAVIRT_OP(getreg), "0"(__reg)		\
+			      : PARAVIRT_CLOBBERS1);			\
+									\
+		ia64_intri_res;						\
+	})
+
+/* void paravirt_setreg(int reg, unsigned long val) */
+#define paravirt_setreg(reg, val)					\
+	do {								\
+		register unsigned long __val asm ("r8") = val;		\
+		register unsigned long __reg asm ("r9") = reg;		\
+		register unsigned long ia64_clobber1 asm ("r8");	\
+		register unsigned long ia64_clobber2 asm ("r9");	\
+									\
+		BUILD_BUG_ON(!__builtin_constant_p(reg));		\
+		asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,	\
+						  PARAVIRT_TYPE(SETREG) \
+						  + (reg))		\
+			      : "=r"(ia64_clobber1),			\
+				"=r"(ia64_clobber2)			\
+			      : PARAVIRT_OP(setreg),			\
+				"1"(__reg), "0"(__val)			\
+			      : PARAVIRT_CLOBBERS2);			\
+	} while (0)
+
+#endif /* ASM_SUPPORTED */
+#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */
+
 #endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index 21c4023..5984083 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -126,7 +126,8 @@
 extern int is_multithreading_enabled(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 
 #else /* CONFIG_SMP */
 
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
index 4e03cfe..86c7db8 100644
--- a/arch/ia64/include/asm/timex.h
+++ b/arch/ia64/include/asm/timex.h
@@ -40,5 +40,6 @@
 }
 
 extern void ia64_cpu_local_tick (void);
+extern unsigned long long ia64_native_sched_clock (void);
 
 #endif /* _ASM_IA64_TIMEX_H */
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index f260dcf..7b4c8c7 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -112,11 +112,6 @@
 
 extern void arch_fix_phys_package_id(int num, u32 slot);
 
-#define pcibus_to_cpumask(bus)	(pcibus_to_node(bus) == -1 ? \
-					CPU_MASK_ALL : \
-					node_to_cpumask(pcibus_to_node(bus)) \
-				)
-
 #define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
 				 cpu_all_mask :				\
 				 cpumask_of_node(pcibus_to_node(bus)))
diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h
index 7a804e8..e425227 100644
--- a/arch/ia64/include/asm/xen/hypervisor.h
+++ b/arch/ia64/include/asm/xen/hypervisor.h
@@ -33,9 +33,6 @@
 #ifndef _ASM_IA64_XEN_HYPERVISOR_H
 #define _ASM_IA64_XEN_HYPERVISOR_H
 
-#ifdef CONFIG_XEN
-
-#include <linux/init.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>	/* to compile feature.c */
 #include <xen/features.h>		/* to comiple xen-netfront.c */
@@ -43,22 +40,32 @@
 
 /* xen_domain_type is set before executing any C code by early_xen_setup */
 enum xen_domain_type {
-	XEN_NATIVE,
-	XEN_PV_DOMAIN,
-	XEN_HVM_DOMAIN,
+	XEN_NATIVE,	/* running on bare hardware */
+	XEN_PV_DOMAIN,	/* running in a PV domain */
+	XEN_HVM_DOMAIN,	/* running in a Xen hvm domain*/
 };
 
+#ifdef CONFIG_XEN
 extern enum xen_domain_type xen_domain_type;
+#else
+#define xen_domain_type		XEN_NATIVE
+#endif
 
 #define xen_domain()		(xen_domain_type != XEN_NATIVE)
-#define xen_pv_domain()		(xen_domain_type == XEN_PV_DOMAIN)
-#define xen_initial_domain()	(xen_pv_domain() && \
+#define xen_pv_domain()		(xen_domain() &&			\
+				 xen_domain_type == XEN_PV_DOMAIN)
+#define xen_hvm_domain()	(xen_domain() &&			\
+				 xen_domain_type == XEN_HVM_DOMAIN)
+
+#ifdef CONFIG_XEN_DOM0
+#define xen_initial_domain()	(xen_pv_domain() &&			\
 				 (xen_start_info->flags & SIF_INITDOMAIN))
-#define xen_hvm_domain()	(xen_domain_type == XEN_HVM_DOMAIN)
+#else
+#define xen_initial_domain()	(0)
+#endif
 
-/* deprecated. remove this */
-#define is_running_on_xen()	(xen_domain_type == XEN_PV_DOMAIN)
 
+#ifdef CONFIG_XEN
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 
@@ -74,16 +81,6 @@
 
 /* For setup_arch() in arch/ia64/kernel/setup.c */
 void xen_ia64_enable_opt_feature(void);
-
-#else /* CONFIG_XEN */
-
-#define xen_domain()		(0)
-#define xen_pv_domain()		(0)
-#define xen_initial_domain()	(0)
-#define xen_hvm_domain()	(0)
-#define is_running_on_xen()	(0)	/* deprecated. remove this */
 #endif
 
-#define is_initial_xendomain()	(0)	/* deprecated. remove this */
-
 #endif /* _ASM_IA64_XEN_HYPERVISOR_H */
diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
index 19c2ae1..c53a476 100644
--- a/arch/ia64/include/asm/xen/inst.h
+++ b/arch/ia64/include/asm/xen/inst.h
@@ -33,6 +33,9 @@
 #define __paravirt_work_processed_syscall_target \
 						xen_work_processed_syscall
 
+#define paravirt_fsyscall_table			xen_fsyscall_table
+#define paravirt_fsys_bubble_down		xen_fsys_bubble_down
+
 #define MOV_FROM_IFA(reg)	\
 	movl reg = XSI_IFA;	\
 	;;			\
@@ -110,6 +113,27 @@
 .endm
 #define MOV_FROM_PSR(pred, reg, clob)	__MOV_FROM_PSR pred, reg, clob
 
+/* assuming ar.itc is read with interrupt disabled. */
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)		\
+(pred)	movl clob = XSI_ITC_OFFSET;				\
+	;;							\
+(pred)	ld8 clob = [clob];					\
+(pred)	mov reg = ar.itc;					\
+	;;							\
+(pred)	add reg = reg, clob;					\
+	;;							\
+(pred)	movl clob = XSI_ITC_LAST;				\
+	;;							\
+(pred)	ld8 clob = [clob];					\
+	;;							\
+(pred)	cmp.geu.unc pred_clob, p0 = clob, reg;			\
+	;;							\
+(pred_clob)	add reg = 1, clob;				\
+	;;							\
+(pred)	movl clob = XSI_ITC_LAST;				\
+	;;							\
+(pred)	st8 [clob] = reg
+
 
 #define MOV_TO_IFA(reg, clob)	\
 	movl clob = XSI_IFA;	\
@@ -362,6 +386,10 @@
 #define RSM_PSR_DT		\
 	XEN_HYPER_RSM_PSR_DT
 
+#define RSM_PSR_BE_I(clob0, clob1)	\
+	RSM_PSR_I(p0, clob0, clob1);	\
+	rum psr.be
+
 #define SSM_PSR_DT_AND_SRLZ_I	\
 	XEN_HYPER_SSM_PSR_DT
 
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index f00fab40..e951e74 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -209,6 +209,15 @@
 			unsigned long krs[8];	/* kernel registers */
 			unsigned long tmp[16];	/* temp registers
 						   (e.g. for hyperprivops) */
+
+			/* itc paravirtualization
+			 * vAR.ITC = mAR.ITC + itc_offset
+			 * itc_last is one which was lastly passed to
+			 * the guest OS in order to prevent it from
+			 * going backwords.
+			 */
+			unsigned long itc_offset;
+			unsigned long itc_last;
 		};
 	};
 };
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
index 4d92d9b..c57fa91 100644
--- a/arch/ia64/include/asm/xen/minstate.h
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -1,3 +1,12 @@
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define XEN_ACCOUNT_GET_STAMP		\
+	MOV_FROM_ITC(pUStk, p6, r20, r2);
+#else
+#define XEN_ACCOUNT_GET_STAMP
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -123,7 +132,7 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
-	ACCOUNT_GET_STAMP									\
+	XEN_ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
diff --git a/arch/ia64/include/asm/xen/patchlist.h b/arch/ia64/include/asm/xen/patchlist.h
new file mode 100644
index 0000000..eae944e
--- /dev/null
+++ b/arch/ia64/include/asm/xen/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist		\
+	__xen_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist			\
+	__xen_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist	\
+	__xen_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist	\
+	__xen_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist			\
+	__xen_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist			\
+	__xen_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist		\
+	__xen_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist		\
+	__xen_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h
index 71ec754..fb4ec5e 100644
--- a/arch/ia64/include/asm/xen/privop.h
+++ b/arch/ia64/include/asm/xen/privop.h
@@ -55,6 +55,8 @@
 #define XSI_BANK1_R16			(XSI_BASE + XSI_BANK1_R16_OFS)
 #define XSI_BANKNUM			(XSI_BASE + XSI_BANKNUM_OFS)
 #define XSI_IHA				(XSI_BASE + XSI_IHA_OFS)
+#define XSI_ITC_OFFSET			(XSI_BASE + XSI_ITC_OFFSET_OFS)
+#define XSI_ITC_LAST			(XSI_BASE + XSI_ITC_LAST_OFS)
 #endif
 
 #ifndef __ASSEMBLY__
@@ -67,7 +69,7 @@
  *  may have different semantics depending on whether they are executed
  *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
  *  be allowed to execute directly, lest incorrect semantics result. */
-extern void xen_fc(unsigned long addr);
+extern void xen_fc(void *addr);
 extern unsigned long xen_thash(unsigned long addr);
 
 /* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
@@ -80,8 +82,10 @@
 extern unsigned long xen_get_cpuid(int index);
 extern unsigned long xen_get_pmd(int index);
 
+#ifndef ASM_SUPPORTED
 extern unsigned long xen_get_eflag(void);	/* see xen_ia64_getreg */
 extern void xen_set_eflag(unsigned long);	/* see xen_ia64_setreg */
+#endif
 
 /************************************************/
 /* Instructions paravirtualized for performance */
@@ -106,6 +110,7 @@
 #define xen_get_virtual_pend()		\
 	(*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1))
 
+#ifndef ASM_SUPPORTED
 /* Although all privileged operations can be left to trap and will
  * be properly handled by Xen, some are frequent enough that we use
  * hyperprivops for performance. */
@@ -123,6 +128,7 @@
 			       unsigned long val4);
 extern void xen_set_kr(unsigned long index, unsigned long val);
 extern void xen_ptcga(unsigned long addr, unsigned long size);
+#endif /* !ASM_SUPPORTED */
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index f2778f2..5628e9a 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
-	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
+	 irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o		\
 	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
 	 unwind.o mca.o mca_asm.o topology.o dma-mapping.o
 
@@ -36,7 +36,8 @@
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
 
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirtentry.o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirtentry.o \
+				   paravirt_patch.o
 
 obj-$(CONFIG_IA64_ESI)		+= esi.o
 ifneq ($(CONFIG_IA64_ESI),)
@@ -45,35 +46,13 @@
 obj-$(CONFIG_DMAR)		+= pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
-# The gate DSO image is built using a special linker script.
-targets += gate.so gate-syms.o
-
-extra-y += gate.so gate-syms.o gate.lds gate.o
-
 # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
 CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 
-CPPFLAGS_gate.lds := -P -C -U$(ARCH)
-
-quiet_cmd_gate = GATE $@
-      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
-		     $(call ld-option, -Wl$(comma)--hash-style=sysv)
-$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
-	$(call if_changed,gate)
-
-$(obj)/built-in.o: $(obj)/gate-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
-
-GATECFLAGS_gate-syms.o = -r
-$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
-	$(call if_changed,gate)
-
-# gate-data.o contains the gate DSO image as data in section .data.gate.
-# We must build gate.so before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/gate-data.o: $(obj)/gate.so
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+# tell compiled for native
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
 
 # Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
 define sed-y
@@ -109,9 +88,9 @@
 clean-files += $(objtree)/include/asm-ia64/nr-irqs.h
 
 #
-# native ivt.S and entry.S
+# native ivt.S, entry.S and fsys.S
 #
-ASM_PARAVIRT_OBJS = ivt.o entry.o
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
 define paravirtualized_native
 AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
 AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
new file mode 100644
index 0000000..1d87f84
--- /dev/null
+++ b/arch/ia64/kernel/Makefile.gate
@@ -0,0 +1,27 @@
+# The gate DSO image is built using a special linker script.
+
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+		     $(call ld-option, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+	$(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data.gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index bdef2ce..5510317 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -890,7 +890,7 @@
 		possible, max((possible - available_cpus), 0));
 
 	for (i = 0; i < possible; i++)
-		cpu_set(i, cpu_possible_map);
+		set_cpu_possible(i, true);
 }
 
 int acpi_map_lsapic(acpi_handle handle, int *pcpu)
@@ -928,9 +928,9 @@
 	buffer.length = ACPI_ALLOCATE_BUFFER;
 	buffer.pointer = NULL;
 
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-	if (cpu >= NR_CPUS)
+	cpumask_complement(&tmp_map, cpu_present_mask);
+	cpu = cpumask_first(&tmp_map);
+	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
 	acpi_map_cpu2node(handle, cpu, physid);
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 742dbb1..af56501 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -316,5 +316,7 @@
 	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
 	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
 	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
+	DEFINE_MAPPED_REG_OFS(XSI_ITC_OFFSET_OFS, itc_offset);
+	DEFINE_MAPPED_REG_OFS(XSI_ITC_LAST_OFS, itc_last);
 #endif /* CONFIG_XEN */
 }
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index efaff15..7ef80e8 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -456,6 +456,7 @@
 		 GRANULEROUNDDOWN((unsigned long) pal_vaddr),
 		 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
 		 IA64_GRANULE_SHIFT);
+	paravirt_dv_serialize_data();
 	ia64_set_psr(psr);		/* restore psr */
 }
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2..ccfdeee 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -735,7 +735,7 @@
 __paravirt_work_processed_syscall:
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	adds r2=PT(LOADRS)+16,r12
-(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
 	;;
 (p6)	ld4 r31=[r18]				// load current_thread_info()->flags
@@ -984,7 +984,7 @@
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	.pred.rel.mutex pUStk,pKStk
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
-(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
 	nop.i 0
 	;;
 #else
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index c1625c7..3567d54 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -25,6 +25,7 @@
 #include <asm/unistd.h>
 
 #include "entry.h"
+#include "paravirt_inst.h"
 
 /*
  * See Documentation/ia64/fsys.txt for details on fsyscalls.
@@ -279,7 +280,7 @@
 (p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
 	;;
 	.pred.rel.mutex p8,p9
-(p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
+	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
 (p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
 (p13)	ld8 r25 = [r19]		// get itc_lastcycle value
 	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
@@ -418,7 +419,7 @@
 	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
 	;;
 
-	rsm psr.i				// mask interrupt delivery
+	RSM_PSR_I(p0, r18, r19)			// mask interrupt delivery
 	mov ar.ccv=0
 	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
 
@@ -491,7 +492,7 @@
 #ifdef CONFIG_SMP
 	st4.rel [r31]=r0			// release the lock
 #endif
-	ssm psr.i
+	SSM_PSR_I(p0, p9, r31)
 	;;
 
 	srlz.d					// ensure psr.i is set again
@@ -513,7 +514,7 @@
 #ifdef CONFIG_SMP
 	st4.rel [r31]=r0			// release the lock
 #endif
-	ssm psr.i
+	SSM_PSR_I(p0, p9, r17)
 	;;
 	srlz.d
 	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
@@ -521,7 +522,7 @@
 #ifdef CONFIG_SMP
 .lock_contention:
 	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
-	ssm psr.i
+	SSM_PSR_I(p0, p9, r17)
 	;;
 	srlz.d
 	br.sptk.many fsys_fallback_syscall
@@ -592,17 +593,17 @@
 	adds r17=-1024,r15
 	movl r14=sys_call_table
 	;;
-	rsm psr.i
+	RSM_PSR_I(p0, r26, r27)
 	shladd r18=r17,3,r14
 	;;
 	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
-	mov r29=psr				// read psr (12 cyc load latency)
+	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
 	mov r27=ar.rsc
 	mov r21=ar.fpsr
 	mov r26=ar.pfs
 END(fsys_fallback_syscall)
 	/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	.prologue
 	.altrp b6
 	.body
@@ -640,7 +641,7 @@
 	 *
 	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
 	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
-	 * PSR.I  : already turned off by the time fsys_bubble_down gets
+	 * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
 	 *	    invoked
 	 * PSR.DFL: always 0 (kernel never turns it on)
 	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
@@ -650,7 +651,7 @@
 	 * PSR.DB : don't care --- kernel never enables kernel-level
 	 *	    breakpoints
 	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
-	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+	 *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
 	 *          will trigger a taken branch; the taken-trap-handler then
 	 *          converts the syscall into a break-based system-call.
 	 */
@@ -683,7 +684,7 @@
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-	mov.m r30=ar.itc			// M    get cycle for accounting
+	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 #else
 	nop.m 0
 #endif
@@ -734,21 +735,21 @@
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
 	;;
-	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
+	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
 	cmp.eq p8,p0=r3,r0			// A
 (p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
 
 	nop.m 0
 (p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
 	br.cond.spnt ia64_trace_syscall		// B
-END(fsys_bubble_down)
+END(paravirt_fsys_bubble_down)
 
 	.rodata
 	.align 8
-	.globl fsyscall_table
+	.globl paravirt_fsyscall_table
 
-	data8 fsys_bubble_down
-fsyscall_table:
+	data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
 	data8 fsys_ni_syscall
 	data8 0				// exit			// 1025
 	data8 0				// read
@@ -1033,4 +1034,4 @@
 
 	// fill in zeros for the remaining entries
 	.zero:
-	.space fsyscall_table + 8*NR_syscalls - .zero, 0
+	.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 74b1ccc..cf5e0a1 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -13,6 +13,7 @@
 #include <asm/sigcontext.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
+#include "paravirt_inst.h"
 
 /*
  * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
@@ -48,87 +49,6 @@
 }
 END(__kernel_syscall_via_break)
 
-/*
- * On entry:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	b6  = return address
- * On exit:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	all other "scratch" registers:	undefined
- *	all "preserved" registers:	same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
-	.prologue
-	.altrp b6
-	.body
-{
-	/*
-	 * Note: the kernel cannot assume that the first two instructions in this
-	 * bundle get executed.  The remaining code must be safe even if
-	 * they do not get executed.
-	 */
-	adds r17=-1024,r15			// A
-	mov r10=0				// A    default to successful syscall execution
-	epc					// B	causes split-issue
-}
-	;;
-	rsm psr.be | psr.i			// M2 (5 cyc to srlz.d)
-	LOAD_FSYSCALL_TABLE(r14)		// X
-	;;
-	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
-	shladd r18=r17,3,r14			// A
-	mov r19=NR_syscalls-1			// A
-	;;
-	lfetch [r18]				// M0|1
-	mov r29=psr				// M2 (12 cyc)
-	// If r17 is a NaT, p6 will be zero
-	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
-	;;
-	mov r21=ar.fpsr				// M2 (12 cyc)
-	tnat.nz p10,p9=r15			// I0
-	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
-	;;
-	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
-(p6)	ld8 r18=[r18]				// M0|1
-	nop.i 0
-	;;
-	nop.m 0
-(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
-	nop.i 0
-	;;
-(p8)	ssm psr.i
-(p6)	mov b7=r18				// I0
-(p8)	br.dptk.many b7				// B
-
-	mov r27=ar.rsc				// M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
- * future version of the linker.  In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
-	;;
-(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
-	;;
-(p6)	mov b7=r14
-(p6)	br.sptk.many b7
-#else
-	BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
-	ssm psr.i
-	mov r10=-1
-(p10)	mov r8=EINVAL
-(p9)	mov r8=ENOSYS
-	FSYS_RETURN
-END(__kernel_syscall_via_epc)
-
 #	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
 #	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
 #	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
@@ -374,3 +294,92 @@
 	// invala not necessary as that will happen when returning to user-mode
 	br.cond.sptk back_from_restore_rbs
 END(__kernel_sigtramp)
+
+/*
+ * On entry:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	b6  = return address
+ * On exit:
+ *	r11 = saved ar.pfs
+ *	r15 = system call #
+ *	b0  = saved return address
+ *	all other "scratch" registers:	undefined
+ *	all "preserved" registers:	same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+	.prologue
+	.altrp b6
+	.body
+{
+	/*
+	 * Note: the kernel cannot assume that the first two instructions in this
+	 * bundle get executed.  The remaining code must be safe even if
+	 * they do not get executed.
+	 */
+	adds r17=-1024,r15			// A
+	mov r10=0				// A    default to successful syscall execution
+	epc					// B	causes split-issue
+}
+	;;
+	RSM_PSR_BE_I(r20, r22)			// M2 (5 cyc to srlz.d)
+	LOAD_FSYSCALL_TABLE(r14)		// X
+	;;
+	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
+	shladd r18=r17,3,r14			// A
+	mov r19=NR_syscalls-1			// A
+	;;
+	lfetch [r18]				// M0|1
+	MOV_FROM_PSR(p0, r29, r8)		// M2 (12 cyc)
+	// If r17 is a NaT, p6 will be zero
+	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+	;;
+	mov r21=ar.fpsr				// M2 (12 cyc)
+	tnat.nz p10,p9=r15			// I0
+	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
+	;;
+	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
+(p6)	ld8 r18=[r18]				// M0|1
+	nop.i 0
+	;;
+	nop.m 0
+(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
+	nop.i 0
+	;;
+	SSM_PSR_I(p8, p14, r25)
+(p6)	mov b7=r18				// I0
+(p8)	br.dptk.many b7				// B
+
+	mov r27=ar.rsc				// M2 (12 cyc)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
+ * future version of the linker.  In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
+(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
+	;;
+(p6)	mov b7=r14
+(p6)	br.sptk.many b7
+#else
+	BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+	SSM_PSR_I(p0, p14, r10)
+	mov r10=-1
+(p10)	mov r8=EINVAL
+(p9)	mov r8=ENOSYS
+	FSYS_RETURN
+
+#ifdef CONFIG_PARAVIRT
+	/*
+	 * padd to make the size of this symbol constant
+	 * independent of paravirtualization.
+	 */
+	.align PAGE_SIZE / 8
+#endif
+END(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index 3cb1abc..88c64ed 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -7,6 +7,7 @@
 
 
 #include <asm/system.h>
+#include "paravirt_patchlist.h"
 
 SECTIONS
 {
@@ -33,21 +34,21 @@
 	. = GATE_ADDR + 0x600;
 
 	.data.patch		: {
-		__start_gate_mckinley_e9_patchlist = .;
+		__paravirt_start_gate_mckinley_e9_patchlist = .;
 		*(.data.patch.mckinley_e9)
-		__end_gate_mckinley_e9_patchlist = .;
+		__paravirt_end_gate_mckinley_e9_patchlist = .;
 
-		__start_gate_vtop_patchlist = .;
+		__paravirt_start_gate_vtop_patchlist = .;
 		*(.data.patch.vtop)
-		__end_gate_vtop_patchlist = .;
+		__paravirt_end_gate_vtop_patchlist = .;
 
-		__start_gate_fsyscall_patchlist = .;
+		__paravirt_start_gate_fsyscall_patchlist = .;
 		*(.data.patch.fsyscall_table)
-		__end_gate_fsyscall_patchlist = .;
+		__paravirt_end_gate_fsyscall_patchlist = .;
 
-		__start_gate_brl_fsys_bubble_down_patchlist = .;
+		__paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
 		*(.data.patch.brl_fsys_bubble_down)
-		__end_gate_brl_fsys_bubble_down_patchlist = .;
+		__paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
 	}						:readable
 
 	.IA_64.unwind_info	: { *(.IA_64.unwind_info*) }
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 59301c4..23f846d 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1050,7 +1050,7 @@
  * except that the multiplication and the shift are done with 128-bit
  * intermediate precision so that we can produce a full 64-bit result.
  */
-GLOBAL_ENTRY(sched_clock)
+GLOBAL_ENTRY(ia64_native_sched_clock)
 	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
 	;;
@@ -1066,7 +1066,13 @@
 	;;
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
-END(sched_clock)
+END(ia64_native_sched_clock)
+#ifndef CONFIG_PARAVIRT
+	//unsigned long long
+	//sched_clock(void) __attribute__((alias("ia64_native_sched_clock")));
+	.global sched_clock
+sched_clock = ia64_native_sched_clock
+#endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 GLOBAL_ENTRY(cycle_to_cputime)
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index f675d8e3..ec9a5fd 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -804,7 +804,7 @@
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-	mov.m r30=ar.itc			// M    get cycle for accounting
+	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 #else
 	mov b6=r30				// I0   setup syscall handler branch reg early
 #endif
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index bab1de2..8f33a88 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1456,9 +1456,9 @@
 
 	ia64_mca_cmc_int_handler(cmc_irq, arg);
 
-	for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
 
-	if (cpuid < NR_CPUS) {
+	if (cpuid < nr_cpu_ids) {
 		platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
 	} else {
 		/* If no log record, switch out of polling mode */
@@ -1525,7 +1525,7 @@
 
 	ia64_mca_cpe_int_handler(cpe_irq, arg);
 
-	for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
 
 	if (cpuid < NR_CPUS) {
 		platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index aaa7d90..da3b0cf 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -446,6 +446,14 @@
 			mod->arch.opd = s;
 		else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
 			mod->arch.unwind = s;
+#ifdef CONFIG_PARAVIRT
+		else if (strcmp(".paravirt_bundles",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_bundles = s;
+		else if (strcmp(".paravirt_insts",
+				secstrings + s->sh_name) == 0)
+			mod->arch.paravirt_insts = s;
+#endif
 
 	if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
 		printk(KERN_ERR "%s: sections missing\n", mod->name);
@@ -525,8 +533,7 @@
 			goto found;
 
 	/* Not enough GOT entries? */
-	if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
-		BUG();
+	BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
 
 	e->val = value;
 	++mod->arch.next_got_entry;
@@ -921,6 +928,30 @@
 	DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
 	if (mod->arch.unwind)
 		register_unwind_table(mod);
+#ifdef CONFIG_PARAVIRT
+        if (mod->arch.paravirt_bundles) {
+                struct paravirt_patch_site_bundle *start =
+                        (struct paravirt_patch_site_bundle *)
+                        mod->arch.paravirt_bundles->sh_addr;
+                struct paravirt_patch_site_bundle *end =
+                        (struct paravirt_patch_site_bundle *)
+                        (mod->arch.paravirt_bundles->sh_addr +
+                         mod->arch.paravirt_bundles->sh_size);
+
+                paravirt_patch_apply_bundle(start, end);
+        }
+        if (mod->arch.paravirt_insts) {
+                struct paravirt_patch_site_inst *start =
+                        (struct paravirt_patch_site_inst *)
+                        mod->arch.paravirt_insts->sh_addr;
+                struct paravirt_patch_site_inst *end =
+                        (struct paravirt_patch_site_inst *)
+                        (mod->arch.paravirt_insts->sh_addr +
+                         mod->arch.paravirt_insts->sh_size);
+
+                paravirt_patch_apply_inst(start, end);
+        }
+#endif
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 9f14c16..a21d7bb 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -46,13 +46,23 @@
  * initialization hooks.
  */
 
-struct pv_init_ops pv_init_ops;
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type);
+
+struct pv_init_ops pv_init_ops =
+{
+#ifdef ASM_SUPPORTED
+	.patch_bundle = ia64_native_patch_bundle,
+#endif
+	.patch_branch = ia64_native_patch_branch,
+};
 
 /***************************************************************************
  * pv_cpu_ops
  * intrinsics hooks.
  */
 
+#ifndef ASM_SUPPORTED
 /* ia64_native_xxx are macros so that we have to make them real functions */
 
 #define DEFINE_VOID_FUNC1(name)					\
@@ -60,7 +70,14 @@
 	ia64_native_ ## name ## _func(unsigned long arg)	\
 	{							\
 		ia64_native_ ## name(arg);			\
-	}							\
+	}
+
+#define DEFINE_VOID_FUNC1_VOID(name)				\
+	static void						\
+	ia64_native_ ## name ## _func(void *arg)		\
+	{							\
+		ia64_native_ ## name(arg);			\
+	}
 
 #define DEFINE_VOID_FUNC2(name)					\
 	static void						\
@@ -68,7 +85,7 @@
 				      unsigned long arg1)	\
 	{							\
 		ia64_native_ ## name(arg0, arg1);		\
-	}							\
+	}
 
 #define DEFINE_FUNC0(name)			\
 	static unsigned long			\
@@ -84,7 +101,7 @@
 		return ia64_native_ ## name(arg);	\
 	}						\
 
-DEFINE_VOID_FUNC1(fc);
+DEFINE_VOID_FUNC1_VOID(fc);
 DEFINE_VOID_FUNC1(intrin_local_irq_restore);
 
 DEFINE_VOID_FUNC2(ptcga);
@@ -274,6 +291,266 @@
 		break;
 	}
 }
+#else
+
+#define __DEFINE_FUNC(name, code)					\
+	extern const char ia64_native_ ## name ## _direct_start[];	\
+	extern const char ia64_native_ ## name ## _direct_end[];	\
+	asm (".align 32\n"						\
+	     ".proc ia64_native_" #name "_func\n"			\
+	     "ia64_native_" #name "_func:\n"				\
+	     "ia64_native_" #name "_direct_start:\n"			\
+	     code							\
+	     "ia64_native_" #name "_direct_end:\n"			\
+	     "br.cond.sptk.many b6\n"					\
+	     ".endp ia64_native_" #name "_func\n")
+
+#define DEFINE_VOID_FUNC0(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code)			\
+	extern void						\
+	ia64_native_ ## name ## _func(void *arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code)				\
+	extern void						\
+	ia64_native_ ## name ## _func(unsigned long arg0,	\
+				      unsigned long arg1);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code)		\
+	extern unsigned long			\
+	ia64_native_ ## name ## _func(void);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code)			\
+	extern unsigned long				\
+	ia64_native_ ## name ## _func(type arg);	\
+	__DEFINE_FUNC(name, code)
+
+DEFINE_VOID_FUNC1_VOID(fc,
+		       "fc r8\n");
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+		  ";;\n"
+		  "     cmp.ne p6, p7 = r8, r0\n"
+		  ";;\n"
+		  "(p6) ssm psr.i\n"
+		  "(p7) rsm psr.i\n"
+		  ";;\n"
+		  "(p6) srlz.d\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+		  "ptc.ga r8, r9\n");
+DEFINE_VOID_FUNC2(set_rr,
+		  "mov rr[r8] = r9\n");
+
+/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
+DEFINE_FUNC0(get_psr_i,
+	     "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
+	     "mov r8 = psr\n"
+	     ";;\n"
+	     "and r8 = r2, r8\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+	     "thash r8 = r8\n");
+DEFINE_FUNC1(get_cpuid, int,
+	     "mov r8 = cpuid[r8]\n");
+DEFINE_FUNC1(get_pmd, int,
+	     "mov r8 = pmd[r8]\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+	     "mov r8 = rr[r8]\n");
+
+DEFINE_VOID_FUNC0(ssm_i,
+		  "ssm psr.i\n");
+DEFINE_VOID_FUNC0(rsm_i,
+		  "rsm psr.i\n");
+
+extern void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+				unsigned long val2, unsigned long val3,
+				unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+	      "mov rr[r0] = r8\n"
+	      "movl r2 = 0x2000000000000000\n"
+	      ";;\n"
+	      "mov rr[r2] = r9\n"
+	      "shl r3 = r2, 1\n"	/* movl r3 = 0x4000000000000000 */
+	      ";;\n"
+	      "add r2 = r2, r3\n"	/* movl r2 = 0x6000000000000000 */
+	      "mov rr[r3] = r10\n"
+	      ";;\n"
+	      "mov rr[r2] = r11\n"
+	      "shl r3 = r3, 1\n"	/* movl r3 = 0x8000000000000000 */
+	      ";;\n"
+	      "mov rr[r3] = r14\n");
+
+extern unsigned long ia64_native_getreg_func(int regnum);
+asm(".global ia64_native_getreg_func\n");
+#define __DEFINE_GET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r8\n"			\
+	";;\n"						\
+	"(p6) mov r8 = " #reg "\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_GET_AR(id, reg)	__DEFINE_GET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_GET_CR(id, reg)	__DEFINE_GET_REG(CR_ ## id, cr.reg)
+
+__DEFINE_FUNC(getreg,
+	      __DEFINE_GET_REG(GP, gp)
+	      /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
+	      __DEFINE_GET_REG(PSR, psr)
+	      __DEFINE_GET_REG(TP, tp)
+	      __DEFINE_GET_REG(SP, sp)
+
+	      __DEFINE_GET_REG(AR_KR0, ar0)
+	      __DEFINE_GET_REG(AR_KR1, ar1)
+	      __DEFINE_GET_REG(AR_KR2, ar2)
+	      __DEFINE_GET_REG(AR_KR3, ar3)
+	      __DEFINE_GET_REG(AR_KR4, ar4)
+	      __DEFINE_GET_REG(AR_KR5, ar5)
+	      __DEFINE_GET_REG(AR_KR6, ar6)
+	      __DEFINE_GET_REG(AR_KR7, ar7)
+	      __DEFINE_GET_AR(RSC, rsc)
+	      __DEFINE_GET_AR(BSP, bsp)
+	      __DEFINE_GET_AR(BSPSTORE, bspstore)
+	      __DEFINE_GET_AR(RNAT, rnat)
+	      __DEFINE_GET_AR(FCR, fcr)
+	      __DEFINE_GET_AR(EFLAG, eflag)
+	      __DEFINE_GET_AR(CSD, csd)
+	      __DEFINE_GET_AR(SSD, ssd)
+	      __DEFINE_GET_REG(AR_CFLAG, ar27)
+	      __DEFINE_GET_AR(FSR, fsr)
+	      __DEFINE_GET_AR(FIR, fir)
+	      __DEFINE_GET_AR(FDR, fdr)
+	      __DEFINE_GET_AR(CCV, ccv)
+	      __DEFINE_GET_AR(UNAT, unat)
+	      __DEFINE_GET_AR(FPSR, fpsr)
+	      __DEFINE_GET_AR(ITC, itc)
+	      __DEFINE_GET_AR(PFS, pfs)
+	      __DEFINE_GET_AR(LC, lc)
+	      __DEFINE_GET_AR(EC, ec)
+
+	      __DEFINE_GET_CR(DCR, dcr)
+	      __DEFINE_GET_CR(ITM, itm)
+	      __DEFINE_GET_CR(IVA, iva)
+	      __DEFINE_GET_CR(PTA, pta)
+	      __DEFINE_GET_CR(IPSR, ipsr)
+	      __DEFINE_GET_CR(ISR, isr)
+	      __DEFINE_GET_CR(IIP, iip)
+	      __DEFINE_GET_CR(IFA, ifa)
+	      __DEFINE_GET_CR(ITIR, itir)
+	      __DEFINE_GET_CR(IIPA, iipa)
+	      __DEFINE_GET_CR(IFS, ifs)
+	      __DEFINE_GET_CR(IIM, iim)
+	      __DEFINE_GET_CR(IHA, iha)
+	      __DEFINE_GET_CR(LID, lid)
+	      __DEFINE_GET_CR(IVR, ivr)
+	      __DEFINE_GET_CR(TPR, tpr)
+	      __DEFINE_GET_CR(EOI, eoi)
+	      __DEFINE_GET_CR(IRR0, irr0)
+	      __DEFINE_GET_CR(IRR1, irr1)
+	      __DEFINE_GET_CR(IRR2, irr2)
+	      __DEFINE_GET_CR(IRR3, irr3)
+	      __DEFINE_GET_CR(ITV, itv)
+	      __DEFINE_GET_CR(PMV, pmv)
+	      __DEFINE_GET_CR(CMCV, cmcv)
+	      __DEFINE_GET_CR(LRR0, lrr0)
+	      __DEFINE_GET_CR(LRR1, lrr1)
+
+	      "mov r8 = -1\n"	/* unsupported case */
+	);
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+asm(".global ia64_native_setreg_func\n");
+#define __DEFINE_SET_REG(id, reg)			\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"	\
+	";;\n"						\
+	"cmp.eq p6, p0 = r2, r9\n"			\
+	";;\n"						\
+	"(p6) mov " #reg " = r8\n"			\
+	"(p6) br.cond.sptk.many b6\n"			\
+	";;\n"
+#define __DEFINE_SET_AR(id, reg)	__DEFINE_SET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_SET_CR(id, reg)	__DEFINE_SET_REG(CR_ ## id, cr.reg)
+__DEFINE_FUNC(setreg,
+	      "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r9\n"
+	      ";;\n"
+	      "(p6) mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      "(p6) br.cond.sptk.many b6\n"
+	      __DEFINE_SET_REG(GP, gp)
+	      __DEFINE_SET_REG(SP, sp)
+
+	      __DEFINE_SET_REG(AR_KR0, ar0)
+	      __DEFINE_SET_REG(AR_KR1, ar1)
+	      __DEFINE_SET_REG(AR_KR2, ar2)
+	      __DEFINE_SET_REG(AR_KR3, ar3)
+	      __DEFINE_SET_REG(AR_KR4, ar4)
+	      __DEFINE_SET_REG(AR_KR5, ar5)
+	      __DEFINE_SET_REG(AR_KR6, ar6)
+	      __DEFINE_SET_REG(AR_KR7, ar7)
+	      __DEFINE_SET_AR(RSC, rsc)
+	      __DEFINE_SET_AR(BSP, bsp)
+	      __DEFINE_SET_AR(BSPSTORE, bspstore)
+	      __DEFINE_SET_AR(RNAT, rnat)
+	      __DEFINE_SET_AR(FCR, fcr)
+	      __DEFINE_SET_AR(EFLAG, eflag)
+	      __DEFINE_SET_AR(CSD, csd)
+	      __DEFINE_SET_AR(SSD, ssd)
+	      __DEFINE_SET_REG(AR_CFLAG, ar27)
+	      __DEFINE_SET_AR(FSR, fsr)
+	      __DEFINE_SET_AR(FIR, fir)
+	      __DEFINE_SET_AR(FDR, fdr)
+	      __DEFINE_SET_AR(CCV, ccv)
+	      __DEFINE_SET_AR(UNAT, unat)
+	      __DEFINE_SET_AR(FPSR, fpsr)
+	      __DEFINE_SET_AR(ITC, itc)
+	      __DEFINE_SET_AR(PFS, pfs)
+	      __DEFINE_SET_AR(LC, lc)
+	      __DEFINE_SET_AR(EC, ec)
+
+	      __DEFINE_SET_CR(DCR, dcr)
+	      __DEFINE_SET_CR(ITM, itm)
+	      __DEFINE_SET_CR(IVA, iva)
+	      __DEFINE_SET_CR(PTA, pta)
+	      __DEFINE_SET_CR(IPSR, ipsr)
+	      __DEFINE_SET_CR(ISR, isr)
+	      __DEFINE_SET_CR(IIP, iip)
+	      __DEFINE_SET_CR(IFA, ifa)
+	      __DEFINE_SET_CR(ITIR, itir)
+	      __DEFINE_SET_CR(IIPA, iipa)
+	      __DEFINE_SET_CR(IFS, ifs)
+	      __DEFINE_SET_CR(IIM, iim)
+	      __DEFINE_SET_CR(IHA, iha)
+	      __DEFINE_SET_CR(LID, lid)
+	      __DEFINE_SET_CR(IVR, ivr)
+	      __DEFINE_SET_CR(TPR, tpr)
+	      __DEFINE_SET_CR(EOI, eoi)
+	      __DEFINE_SET_CR(IRR0, irr0)
+	      __DEFINE_SET_CR(IRR1, irr1)
+	      __DEFINE_SET_CR(IRR2, irr2)
+	      __DEFINE_SET_CR(IRR3, irr3)
+	      __DEFINE_SET_CR(ITV, itv)
+	      __DEFINE_SET_CR(PMV, pmv)
+	      __DEFINE_SET_CR(CMCV, cmcv)
+	      __DEFINE_SET_CR(LRR0, lrr0)
+	      __DEFINE_SET_CR(LRR1, lrr1)
+	);
+#endif
 
 struct pv_cpu_ops pv_cpu_ops = {
 	.fc		= ia64_native_fc_func,
@@ -366,4 +643,258 @@
 
 struct pv_time_ops pv_time_ops = {
 	.do_steal_accounting = ia64_native_do_steal_accounting,
+	.sched_clock = ia64_native_sched_clock,
 };
+
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#ifdef ASM_SUPPORTED
+#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg)	\
+	__DEFINE_FUNC(get_ ## name,			\
+		      ";;\n"				\
+		      "mov r8 = " #reg "\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+	__DEFINE_FUNC(set_ ## name,			\
+		      ";;\n"				\
+		      "mov " #reg " = r8\n"		\
+		      ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg)		\
+	IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg);	\
+	IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg)	\
+
+#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
+
+#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg)			\
+	IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
+
+
+IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
+IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
+
+/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
+__DEFINE_FUNC(set_psr_l,
+	      ";;\n"
+	      "mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+	      ".serialize.data\n"
+#endif
+	      ";;\n");
+
+IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
+IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
+
+IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
+IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
+IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
+IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
+IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
+IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
+IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
+IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
+
+IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
+IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
+IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
+IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
+IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
+IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
+IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
+IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
+IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
+IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
+IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
+IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
+IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
+IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
+IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
+IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
+IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
+IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
+IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
+
+IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
+IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
+IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
+IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
+IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
+IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
+IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
+IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
+IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
+IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
+IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
+IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
+IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
+IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
+IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
+IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
+IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
+IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
+IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
+IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
+IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
+IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
+IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
+IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
+
+static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type)		\
+	{							\
+		(void*)ia64_native_ ## name ## _direct_start,	\
+		(void*)ia64_native_ ## name ## _direct_end,	\
+		PARAVIRT_PATCH_TYPE_ ## type,			\
+	}
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
+				      INTRIN_LOCAL_IRQ_RESTORE),
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_get_ ## name ## _direct_start,	\
+		(void*)ia64_native_get_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
+	{								\
+		(void*)ia64_native_set_ ## name ## _direct_start,	\
+		(void*)ia64_native_set_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg,		\
+	}
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg),	\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg)		\
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
+
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
+	IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
+};
+
+unsigned long __init_or_module
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+	const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
+		sizeof(ia64_native_patch_bundle_elems[0]);
+
+	return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+					      ia64_native_patch_bundle_elems,
+					      nelems, NULL);
+}
+#endif /* ASM_SUPPOTED */
+
+extern const char ia64_native_switch_to[];
+extern const char ia64_native_leave_syscall[];
+extern const char ia64_native_work_processed_syscall[];
+extern const char ia64_native_leave_kernel[];
+
+const struct paravirt_patch_branch_target ia64_native_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type)			\
+	{						\
+		ia64_native_ ## name,			\
+		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
+	}
+	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type)
+{
+	const unsigned long nelem =
+		sizeof(ia64_native_branch_target) /
+		sizeof(ia64_native_branch_target[0]);
+	__paravirt_patch_apply_branch(tag, type,
+				      ia64_native_branch_target, nelem);
+}
diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c
new file mode 100644
index 0000000..bfdfef1
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patch.c
@@ -0,0 +1,514 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patch.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/intrinsics.h>
+#include <asm/kprobes.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+
+typedef union ia64_inst {
+        struct {
+		unsigned long long qp : 6;
+		unsigned long long : 31;
+		unsigned long long opcode : 4;
+		unsigned long long reserved : 23;
+        } generic;
+        unsigned long long l;
+} ia64_inst_t;
+
+/*
+ * flush_icache_range() can't be used here.
+ * we are here before cpu_init() which initializes
+ * ia64_i_cache_stride_shift. flush_icache_range() uses it.
+ */
+void __init_or_module
+paravirt_flush_i_cache_range(const void *instr, unsigned long size)
+{
+	extern void paravirt_fc_i(const void *addr);
+	unsigned long i;
+
+	for (i = 0; i < size; i += sizeof(bundle_t))
+		paravirt_fc_i(instr + i);
+}
+
+bundle_t* __init_or_module
+paravirt_get_bundle(unsigned long tag)
+{
+	return (bundle_t *)(tag & ~3UL);
+}
+
+unsigned long __init_or_module
+paravirt_get_slot(unsigned long tag)
+{
+	return tag & 3UL;
+}
+
+unsigned long __init_or_module
+paravirt_get_num_inst(unsigned long stag, unsigned long etag)
+{
+	bundle_t *sbundle = paravirt_get_bundle(stag);
+	unsigned long sslot = paravirt_get_slot(stag);
+	bundle_t *ebundle = paravirt_get_bundle(etag);
+	unsigned long eslot = paravirt_get_slot(etag);
+
+	return (ebundle - sbundle) * 3 + eslot - sslot + 1;
+}
+
+unsigned long __init_or_module
+paravirt_get_next_tag(unsigned long tag)
+{
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+	case 1:
+		return tag + 1;
+	case 2: {
+		bundle_t *bundle = paravirt_get_bundle(tag);
+		return (unsigned long)(bundle + 1);
+	}
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot0(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot0;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot1(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad0.slot1_p0 |
+		((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot2(const bundle_t *bundle)
+{
+	ia64_inst_t inst;
+	inst.l = bundle->quad1.slot2;
+	return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_inst(unsigned long tag)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		return paravirt_read_slot0(bundle);
+	case 1:
+		return paravirt_read_slot1(bundle);
+	case 2:
+		return paravirt_read_slot2(bundle);
+	default:
+		BUG();
+	}
+	/* NOTREACHED */
+}
+
+void __init_or_module
+paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot0 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad0.slot1_p0 = inst.l;
+	bundle->quad1.slot1_p1 = inst.l >> 18UL;
+}
+
+void __init_or_module
+paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
+{
+	bundle->quad1.slot2 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	unsigned long slot = paravirt_get_slot(tag);
+
+	switch (slot) {
+	case 0:
+		paravirt_write_slot0(bundle, inst);
+		break;
+	case 1:
+		paravirt_write_slot1(bundle, inst);
+		break;
+	case 2:
+		paravirt_write_slot2(bundle, inst);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
+}
+
+/* for debug */
+void
+paravirt_print_bundle(const bundle_t *bundle)
+{
+	const unsigned long *quad = (const unsigned long *)bundle;
+	ia64_inst_t slot0 = paravirt_read_slot0(bundle);
+	ia64_inst_t slot1 = paravirt_read_slot1(bundle);
+	ia64_inst_t slot2 = paravirt_read_slot2(bundle);
+
+	printk(KERN_DEBUG
+	       "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
+	printk(KERN_DEBUG
+	       "bundle template 0x%x\n",
+	       bundle->quad0.template);
+	printk(KERN_DEBUG
+	       "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
+	       (unsigned long)bundle->quad0.slot0,
+	       (unsigned long)bundle->quad0.slot1_p0,
+	       (unsigned long)bundle->quad1.slot1_p1,
+	       (unsigned long)bundle->quad1.slot2);
+	printk(KERN_DEBUG
+	       "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
+	       slot0.l, slot1.l, slot2.l);
+}
+
+static int noreplace_paravirt __init_or_module = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+	noreplace_paravirt = 1;
+	return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+
+#ifdef ASM_SUPPORTED
+static void __init_or_module
+fill_nop_bundle(void *sbundle, void *ebundle)
+{
+	extern const char paravirt_nop_bundle[];
+	extern const unsigned long paravirt_nop_bundle_size;
+
+	void *bundle = sbundle;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	while (bundle < ebundle) {
+		memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
+
+		bundle += paravirt_nop_bundle_size;
+	}
+}
+
+/* helper function */
+unsigned long __init_or_module
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+			      const struct paravirt_patch_bundle_elem *elems,
+			      unsigned long nelems,
+			      const struct paravirt_patch_bundle_elem **found)
+{
+	unsigned long used = 0;
+	unsigned long i;
+
+	BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+	BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+	found = NULL;
+	for (i = 0; i < nelems; i++) {
+		const struct paravirt_patch_bundle_elem *p = &elems[i];
+		if (p->type == type) {
+			unsigned long need = p->ebundle - p->sbundle;
+			unsigned long room = ebundle - sbundle;
+
+			if (found != NULL)
+				*found = p;
+
+			if (room < need) {
+				/* no room to replace. skip it */
+				printk(KERN_DEBUG
+				       "the space is too small to put "
+				       "bundles. type %ld need %ld room %ld\n",
+				       type, need, room);
+				break;
+			}
+
+			used = need;
+			memcpy(sbundle, p->sbundle, used);
+			break;
+		}
+	}
+
+	return used;
+}
+
+void __init_or_module
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+			    const struct paravirt_patch_site_bundle *end)
+{
+	const struct paravirt_patch_site_bundle *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_bundle == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long used;
+
+		used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
+						   p->type);
+		if (used == 0)
+			continue;
+
+		fill_nop_bundle(p->sbundle + used, p->ebundle);
+		paravirt_flush_i_cache_range(p->sbundle,
+					     p->ebundle - p->sbundle);
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+/*
+ * nop.i, nop.m, nop.f instruction are same format.
+ * but nop.b has differennt format.
+ * This doesn't support nop.b for now.
+ */
+static void __init_or_module
+fill_nop_inst(unsigned long stag, unsigned long etag)
+{
+	extern const bundle_t paravirt_nop_mfi_inst_bundle[];
+	unsigned long tag;
+	const ia64_inst_t nop_inst =
+		paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
+
+	for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
+		paravirt_write_inst(tag, nop_inst);
+}
+
+void __init_or_module
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+			  const struct paravirt_patch_site_inst *end)
+{
+	const struct paravirt_patch_site_inst *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_inst == NULL)
+		return;
+
+	for (p = start; p < end; p++) {
+		unsigned long tag;
+		bundle_t *sbundle;
+		bundle_t *ebundle;
+
+		tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
+		if (tag == p->stag)
+			continue;
+
+		fill_nop_inst(tag, p->etag);
+		sbundle = paravirt_get_bundle(p->stag);
+		ebundle = paravirt_get_bundle(p->etag) + 1;
+		paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
+					     sizeof(bundle_t));
+	}
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+#endif /* ASM_SUPPOTED */
+
+/* brl.cond.sptk.many <target64> X3 */
+typedef union inst_x3_op {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btyp: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long i: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_x3_op_t;
+
+typedef union inst_x3_imm {
+	ia64_inst_t inst;
+	struct {
+		unsigned long unused: 2;
+		unsigned long imm39: 39;
+	};
+	unsigned long l;
+} inst_x3_imm_t;
+
+void __init_or_module
+paravirt_patch_reloc_brl(unsigned long tag, const void *target)
+{
+	unsigned long tag_op = paravirt_get_next_tag(tag);
+	unsigned long tag_imm = tag;
+	bundle_t *bundle = paravirt_get_bundle(tag);
+
+	ia64_inst_t inst_op = paravirt_read_inst(tag_op);
+	ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
+
+	inst_x3_op_t inst_x3_op = { .l = inst_op.l };
+	inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
+
+	unsigned long imm60 =
+		((unsigned long)target - (unsigned long)bundle) >> 4;
+
+	BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	/* imm60[59] 1bit */
+	inst_x3_op.i = (imm60 >> 59) & 1;
+	/* imm60[19:0] 20bit */
+	inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
+	/* imm60[58:20] 39bit */
+	inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
+
+	inst_op.l = inst_x3_op.l;
+	inst_imm.l = inst_x3_imm.l;
+
+	paravirt_write_inst(tag_op, inst_op);
+	paravirt_write_inst(tag_imm, inst_imm);
+}
+
+/* br.cond.sptk.many <target25>	B1 */
+typedef union inst_b1 {
+	ia64_inst_t inst;
+	struct {
+		unsigned long qp: 6;
+		unsigned long btype: 3;
+		unsigned long unused: 3;
+		unsigned long p: 1;
+		unsigned long imm20b: 20;
+		unsigned long wh: 2;
+		unsigned long d: 1;
+		unsigned long s: 1;
+		unsigned long opcode: 4;
+	};
+	unsigned long l;
+} inst_b1_t;
+
+void __init
+paravirt_patch_reloc_br(unsigned long tag, const void *target)
+{
+	bundle_t *bundle = paravirt_get_bundle(tag);
+	ia64_inst_t inst = paravirt_read_inst(tag);
+	unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
+	inst_b1_t inst_b1;
+
+	BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+	inst_b1.l = inst.l;
+	if (target25 & (1UL << 63))
+		inst_b1.s = 1;
+	else
+		inst_b1.s = 0;
+
+	inst_b1.imm20b = target25 >> 4;
+	inst.l = inst_b1.l;
+
+	paravirt_write_inst(tag, inst);
+}
+
+void __init
+__paravirt_patch_apply_branch(
+	unsigned long tag, unsigned long type,
+	const struct paravirt_patch_branch_target *entries,
+	unsigned int nr_entries)
+{
+	unsigned int i;
+	for (i = 0; i < nr_entries; i++) {
+		if (entries[i].type == type) {
+			paravirt_patch_reloc_br(tag, entries[i].entry);
+			break;
+		}
+	}
+}
+
+static void __init
+paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
+			    const struct paravirt_patch_site_branch *end)
+{
+	const struct paravirt_patch_site_branch *p;
+
+	if (noreplace_paravirt)
+		return;
+	if (pv_init_ops.patch_branch == NULL)
+		return;
+
+	for (p = start; p < end; p++)
+		(*pv_init_ops.patch_branch)(p->tag, p->type);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+void __init
+paravirt_patch_apply(void)
+{
+	extern const char __start_paravirt_bundles[];
+	extern const char __stop_paravirt_bundles[];
+	extern const char __start_paravirt_insts[];
+	extern const char __stop_paravirt_insts[];
+	extern const char __start_paravirt_branches[];
+	extern const char __stop_paravirt_branches[];
+
+	paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
+				    __start_paravirt_bundles,
+				    (const struct paravirt_patch_site_bundle *)
+				    __stop_paravirt_bundles);
+	paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
+				  __start_paravirt_insts,
+				  (const struct paravirt_patch_site_inst *)
+				  __stop_paravirt_insts);
+	paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
+				    __start_paravirt_branches,
+				    (const struct paravirt_patch_site_branch *)
+				    __stop_paravirt_branches);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c
new file mode 100644
index 0000000..b28082a
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.c
@@ -0,0 +1,79 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/bug.h>
+#include <asm/paravirt.h>
+
+#define DECLARE(name)						\
+	extern unsigned long					\
+		__ia64_native_start_gate_##name##_patchlist[];	\
+	extern unsigned long					\
+		__ia64_native_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __start_gate_section[];
+
+#define ASSIGN(name)							    \
+	.start_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_start_gate_##name##_patchlist, \
+	.end_##name##_patchlist =					    \
+		(unsigned long)__ia64_native_end_gate_##name##_patchlist
+
+struct pv_patchdata pv_patchdata __initdata = {
+	ASSIGN(fsyscall),
+	ASSIGN(brl_fsys_bubble_down),
+	ASSIGN(vtop),
+	ASSIGN(mckinley_e9),
+
+	.gate_section = (void*)__start_gate_section,
+};
+
+
+unsigned long __init
+paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
+{
+
+#define CASE(NAME, name)					\
+	case PV_GATE_START_##NAME:				\
+		return pv_patchdata.start_##name##_patchlist;	\
+	case PV_GATE_END_##NAME:				\
+		return pv_patchdata.end_##name##_patchlist;	\
+
+	switch (type) {
+		CASE(FSYSCALL, fsyscall);
+		CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
+		CASE(VTOP, vtop);
+		CASE(MCKINLEY_E9, mckinley_e9);
+	default:
+		BUG();
+		break;
+	}
+	return 0;
+}
+
+void * __init
+paravirt_get_gate_section(void)
+{
+	return pv_patchdata.gate_section;
+}
diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h
new file mode 100644
index 0000000..0684aa6
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@@ -0,0 +1,28 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#if defined(__IA64_GATE_PARAVIRTUALIZED_XEN)
+#include <asm/xen/patchlist.h>
+#else
+#include <asm/native/patchlist.h>
+#endif
+
diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S
index 2f42fcb..6158560 100644
--- a/arch/ia64/kernel/paravirtentry.S
+++ b/arch/ia64/kernel/paravirtentry.S
@@ -20,8 +20,11 @@
  *
  */
 
+#include <linux/init.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-offsets.h>
+#include <asm/paravirt_privop.h>
+#include <asm/paravirt_patch.h>
 #include "entry.h"
 
 #define DATA8(sym, init_value)			\
@@ -32,29 +35,87 @@
 	data8 init_value ;			\
 	.popsection
 
-#define BRANCH(targ, reg, breg)		\
-	movl reg=targ ;			\
-	;;				\
-	ld8 reg=[reg] ;			\
-	;;				\
-	mov breg=reg ;			\
+#define BRANCH(targ, reg, breg, type)					\
+	PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ;	\
+	;;								\
+	movl reg=targ ;							\
+	;;								\
+	ld8 reg=[reg] ;							\
+	;;								\
+	mov breg=reg ;							\
 	br.cond.sptk.many breg
 
-#define BRANCH_PROC(sym, reg, breg)				\
-	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
-	GLOBAL_ENTRY(paravirt_ ## sym) ;			\
-		BRANCH(paravirt_ ## sym ## _targ, reg, breg) ;	\
+#define BRANCH_PROC(sym, reg, breg, type)				\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
 	END(paravirt_ ## sym)
 
-#define BRANCH_PROC_UNWINFO(sym, reg, breg)			\
-	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
-	GLOBAL_ENTRY(paravirt_ ## sym) ;			\
-		PT_REGS_UNWIND_INFO(0) ;			\
-		BRANCH(paravirt_ ## sym ## _targ, reg, breg) ;	\
+#define BRANCH_PROC_UNWINFO(sym, reg, breg, type)			\
+	DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ;		\
+	GLOBAL_ENTRY(paravirt_ ## sym) ;				\
+		PT_REGS_UNWIND_INFO(0) ;				\
+		BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ;	\
 	END(paravirt_ ## sym)
 
 
-BRANCH_PROC(switch_to, r22, b7)
-BRANCH_PROC_UNWINFO(leave_syscall, r22, b7)
-BRANCH_PROC(work_processed_syscall, r2, b7)
-BRANCH_PROC_UNWINFO(leave_kernel, r22, b7)
+BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
+BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
+
+
+#ifdef CONFIG_MODULES
+#define __INIT_OR_MODULE	.text
+#define __INITDATA_OR_MODULE	.data
+#else
+#define __INIT_OR_MODULE	__INIT
+#define __INITDATA_OR_MODULE	__INITDATA
+#endif /* CONFIG_MODULES */
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_fc_i)
+	fc.i r32
+	br.ret.sptk.many rp
+	END(paravirt_fc_i)
+	__FINIT
+
+	__INIT_OR_MODULE
+	.align 32
+	GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
+	{
+		nop.b 0
+		nop.b 0
+		nop.b 0
+	}
+	END(paravirt_nop_b_inst_bundle)
+	__FINIT
+
+	/* NOTE: nop.[mfi] has same format */
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
+	{
+		nop.m 0
+		nop.f 0
+		nop.i 0
+	}
+	END(paravirt_nop_mfi_inst_bundle)
+	__FINIT
+
+	__INIT_OR_MODULE
+	GLOBAL_ENTRY(paravirt_nop_bundle)
+paravirt_nop_bundle_start:
+	{
+		nop 0
+		nop 0
+		nop 0
+	}
+paravirt_nop_bundle_end:
+	END(paravirt_nop_bundle)
+	__FINIT
+
+	__INITDATA_OR_MODULE
+	.align 8
+	.global paravirt_nop_bundle_size
+paravirt_nop_bundle_size:
+	data8	paravirt_nop_bundle_end - paravirt_nop_bundle_start
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index b83b2c5..68a1311 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 
+#include <asm/paravirt.h>
 #include <asm/patch.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -169,16 +170,35 @@
 	ia64_srlz_i();
 }
 
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+	.fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+	return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+	return pv_fsys_data.fsys_bubble_down;
+}
+
 static void __init
 patch_fsyscall_table (unsigned long start, unsigned long end)
 {
-	extern unsigned long fsyscall_table[NR_syscalls];
+	u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
 	s32 *offp = (s32 *) start;
 	u64 ip;
 
 	while (offp < (s32 *) end) {
 		ip = (u64) ia64_imva((char *) offp + *offp);
-		ia64_patch_imm64(ip, (u64) fsyscall_table);
+		ia64_patch_imm64(ip, fsyscall_table);
 		ia64_fc((void *) ip);
 		++offp;
 	}
@@ -189,7 +209,7 @@
 static void __init
 patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 {
-	extern char fsys_bubble_down[];
+	u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
 	s32 *offp = (s32 *) start;
 	u64 ip;
 
@@ -207,13 +227,13 @@
 void __init
 ia64_patch_gate (void)
 {
-#	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
-#	define END(name)	((unsigned long)__end_gate_##name##_patchlist)
+#	define START(name)	paravirt_get_gate_patchlist(PV_GATE_START_##name)
+#	define END(name)	paravirt_get_gate_patchlist(PV_GATE_END_##name)
 
-	patch_fsyscall_table(START(fsyscall), END(fsyscall));
-	patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
-	ia64_patch_vtop(START(vtop), END(vtop));
-	ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
+	patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
+	patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
+	ia64_patch_vtop(START(VTOP), END(VTOP));
+	ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
 }
 
 void ia64_patch_phys_stack_reg(unsigned long val)
@@ -229,7 +249,7 @@
 	while (offp < end) {
 		ip = (u64) offp + *offp;
 		ia64_patch(ip, mask, imm);
-		ia64_fc(ip);
+		ia64_fc((void *)ip);
 		++offp;
 	}
 	ia64_sync_i();
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5c0f408..8a06dc48 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -5603,7 +5603,7 @@
  * /proc/perfmon interface, for debug only
  */
 
-#define PFM_PROC_SHOW_HEADER	((void *)NR_CPUS+1)
+#define PFM_PROC_SHOW_HEADER	((void *)nr_cpu_ids+1)
 
 static void *
 pfm_proc_start(struct seq_file *m, loff_t *pos)
@@ -5612,7 +5612,7 @@
 		return PFM_PROC_SHOW_HEADER;
 	}
 
-	while (*pos <= NR_CPUS) {
+	while (*pos <= nr_cpu_ids) {
 		if (cpu_online(*pos - 1)) {
 			return (void *)*pos;
 		}
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index ecb9eb7..7053c55 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -317,7 +317,7 @@
 	}
 
 	n = data->cpu_check;
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (cpu_isset(n, data->cpu_event)) {
 			if (!cpu_online(n)) {
 				cpu_clear(n, data->cpu_event);
@@ -326,7 +326,7 @@
 			cpu = n;
 			break;
 		}
-		if (++n == NR_CPUS)
+		if (++n == nr_cpu_ids)
 			n = 0;
 	}
 
@@ -337,7 +337,7 @@
 
 	/* for next read, start checking at next CPU */
 	data->cpu_check = cpu;
-	if (++data->cpu_check == NR_CPUS)
+	if (++data->cpu_check == nr_cpu_ids)
 		data->cpu_check = 0;
 
 	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 865af27..714066a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -52,6 +52,7 @@
 #include <asm/meminit.h>
 #include <asm/page.h>
 #include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
 #include <asm/patch.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -537,6 +538,7 @@
 	paravirt_arch_setup_early();
 
 	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+	paravirt_patch_apply();
 
 	*cmdline_p = __va(ia64_boot_param->command_line);
 	strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
@@ -730,10 +732,10 @@
 c_start (struct seq_file *m, loff_t *pos)
 {
 #ifdef CONFIG_SMP
-	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+	while (*pos < nr_cpu_ids && !cpu_online(*pos))
 		++*pos;
 #endif
-	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+	return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL;
 }
 
 static void *
@@ -1016,8 +1018,7 @@
 					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
-	if (current->mm)
-		BUG();
+	BUG_ON(current->mm);
 
 	ia64_mmu_init(ia64_imva(cpu_data));
 	ia64_mca_cpu_init(ia64_imva(cpu_data));
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index da8f020..2ea4199 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -166,11 +166,11 @@
  * Called with preemption disabled.
  */
 static inline void
-send_IPI_mask(cpumask_t mask, int op)
+send_IPI_mask(const struct cpumask *mask, int op)
 {
 	unsigned int cpu;
 
-	for_each_cpu_mask(cpu, mask) {
+	for_each_cpu(cpu, mask) {
 			send_IPI_single(cpu, op);
 	}
 }
@@ -316,7 +316,7 @@
 	send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
 }
 
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
 	send_IPI_mask(mask, IPI_CALL_FUNC);
 }
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 5229054..7700e23 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -581,14 +581,14 @@
 
 	ia64_cpu_to_sapicid[0] = boot_cpu_id;
 	cpus_clear(cpu_present_map);
-	cpu_set(0, cpu_present_map);
-	cpu_set(0, cpu_possible_map);
+	set_cpu_present(0, true);
+	set_cpu_possible(0, true);
 	for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
 		sapicid = smp_boot_data.cpu_phys_id[i];
 		if (sapicid == boot_cpu_id)
 			continue;
-		cpu_set(cpu, cpu_present_map);
-		cpu_set(cpu, cpu_possible_map);
+		set_cpu_present(cpu, true);
+		set_cpu_possible(cpu, true);
 		ia64_cpu_to_sapicid[cpu] = sapicid;
 		cpu++;
 	}
@@ -626,12 +626,9 @@
 	 */
 	if (!max_cpus) {
 		printk(KERN_INFO "SMP mode deactivated.\n");
-		cpus_clear(cpu_online_map);
-		cpus_clear(cpu_present_map);
-		cpus_clear(cpu_possible_map);
-		cpu_set(0, cpu_online_map);
-		cpu_set(0, cpu_present_map);
-		cpu_set(0, cpu_possible_map);
+		init_cpu_online(cpumask_of(0));
+		init_cpu_present(cpumask_of(0));
+		init_cpu_possible(cpumask_of(0));
 		return;
 	}
 }
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index d6747ba..641c8b6 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -51,6 +51,15 @@
 #endif
 
 #ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+unsigned long long sched_clock(void)
+{
+        return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
 static void
 paravirt_clocksource_resume(void)
 {
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 3765efc..4a95e86 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -169,6 +169,30 @@
 	  __end___mckinley_e9_bundles = .;
 	}
 
+#if defined(CONFIG_PARAVIRT)
+  . = ALIGN(16);
+  .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET)
+	{
+	  __start_paravirt_bundles = .;
+          *(.paravirt_bundles)
+	  __stop_paravirt_bundles = .;
+	}
+  . = ALIGN(16);
+  .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET)
+	{
+	  __start_paravirt_insts = .;
+          *(.paravirt_insts)
+	  __stop_paravirt_insts = .;
+	}
+  . = ALIGN(16);
+  .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET)
+	{
+	  __start_paravirt_branches = .;
+	  *(.paravirt_branches)
+	  __stop_paravirt_branches = .;
+	}
+#endif
+
 #if defined(CONFIG_IA64_GENERIC)
   /* Machine Vector */
   . = ALIGN(16);
@@ -201,6 +225,12 @@
 	  __start_gate_section = .;
 	  *(.data.gate)
 	  __stop_gate_section = .;
+#ifdef CONFIG_XEN
+	  . = ALIGN(PAGE_SIZE);
+	  __xen_start_gate_section = .;
+	  *(.data.gate.xen)
+	  __xen_stop_gate_section = .;
+#endif
 	}
   . = ALIGN(PAGE_SIZE);		/* make sure the gate page doesn't expose
   				 * kernel data
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 076b00d..28af6a7 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -70,7 +70,7 @@
 	int l;
 
 	for (l = 0; l < (len + 32); l += 32)
-		ia64_fc(start + l);
+		ia64_fc((void *)(start + l));
 
 	ia64_sync_i();
 	ia64_srlz_i();
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index d4d2805..a18ee17 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -386,7 +386,7 @@
 		else
 			*rnat_addr = (*rnat_addr) & (~nat_mask);
 
-		ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
+		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
 		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
 	}
 	local_irq_restore(psr);
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 38232b3..2c2501f 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -210,6 +210,7 @@
 		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
 		psr = ia64_clear_ic();
 		ia64_itc(type, va, phy_pte, itir_ps(itir));
+		paravirt_dv_serialize_data();
 		ia64_set_psr(psr);
 	}
 
@@ -456,6 +457,7 @@
 		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
 		psr = ia64_clear_ic();
 		ia64_itc(type, ifa, phy_pte, ps);
+		paravirt_dv_serialize_data();
 		ia64_set_psr(psr);
 	}
 	if (!(pte&VTLB_PTE_IO))
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 56e1290..c0f3bee 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -35,6 +35,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/mca.h>
+#include <asm/paravirt.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
@@ -259,6 +260,7 @@
 static void __init
 setup_gate (void)
 {
+	void *gate_section;
 	struct page *page;
 
 	/*
@@ -266,10 +268,11 @@
 	 * headers etc. and once execute-only page to enable
 	 * privilege-promotion via "epc":
 	 */
-	page = virt_to_page(ia64_imva(__start_gate_section));
+	gate_section = paravirt_get_gate_section();
+	page = virt_to_page(ia64_imva(gate_section));
 	put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
 #ifdef HAVE_BUGGY_SEGREL
-	page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE));
+	page = virt_to_page(ia64_imva(gate_section + PAGE_SIZE));
 	put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
 #else
 	put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
@@ -633,8 +636,7 @@
 #endif
 
 #ifdef CONFIG_FLATMEM
-	if (!mem_map)
-		BUG();
+	BUG_ON(!mem_map);
 	max_mapnr = max_low_pfn;
 #endif
 
@@ -667,8 +669,8 @@
 	 * code can tell them apart.
 	 */
 	for (i = 0; i < NR_syscalls; ++i) {
-		extern unsigned long fsyscall_table[NR_syscalls];
 		extern unsigned long sys_call_table[NR_syscalls];
+		unsigned long *fsyscall_table = paravirt_get_fsyscall_table();
 
 		if (!fsyscall_table[i] || nolwsys)
 			fsyscall_table[i] = sys_call_table[i] | 1;
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index bd9818a..b9f3d7b 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -309,7 +309,7 @@
 
 	preempt_disable();
 #ifdef CONFIG_SMP
-	if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) {
+	if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
 		platform_global_tlb_purge(mm, start, end, nbits);
 		preempt_enable();
 		return;
diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed
index ba66ac2..e59809a 100644
--- a/arch/ia64/scripts/pvcheck.sed
+++ b/arch/ia64/scripts/pvcheck.sed
@@ -17,6 +17,7 @@
 s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g
 s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g	# avoid ar.fpsr
 s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g
+s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g
 s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g
 s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g
 s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 0d4ffa4..57f280d 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -135,8 +135,7 @@
 	}
 
 	war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
-	if (!war_list)
-		BUG();
+	BUG_ON(!war_list);
 
 	SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
 			nasid, widget, __pa(war_list), 0, 0, 0 ,0);
@@ -180,23 +179,20 @@
 		sizeof(struct sn_flush_device_kernel *);
 	hubdev->hdi_flush_nasid_list.widget_p =
 		kzalloc(size, GFP_KERNEL);
-	if (!hubdev->hdi_flush_nasid_list.widget_p)
-		BUG();
+	BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p);
 
 	for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
 		size = DEV_PER_WIDGET *
 			sizeof(struct sn_flush_device_kernel);
 		sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
-		if (!sn_flush_device_kernel)
-			BUG();
+		BUG_ON(!sn_flush_device_kernel);
 
 		dev_entry = sn_flush_device_kernel;
 		for (device = 0; device < DEV_PER_WIDGET;
 		     device++, dev_entry++) {
 			size = sizeof(struct sn_flush_device_common);
 			dev_entry->common = kzalloc(size, GFP_KERNEL);
-			if (!dev_entry->common)
-				BUG();
+			BUG_ON(!dev_entry->common);
 			if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST))
 				status = sal_get_device_dmaflush_list(
 					     hubdev->hdi_nasid, widget, device,
@@ -326,8 +322,7 @@
 	 */
 	controller->platform_data = kzalloc(sizeof(struct sn_platform_data),
 					    GFP_KERNEL);
-	if (controller->platform_data == NULL)
-		BUG();
+	BUG_ON(controller->platform_data == NULL);
 	sn_platform_data =
 			(struct sn_platform_data *) controller->platform_data;
 	sn_platform_data->provider_soft = provider_soft;
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index e2eb2da..ee774c3 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -128,8 +128,7 @@
 {
 		controller->window = kcalloc(2, sizeof(struct pci_window),
 					     GFP_KERNEL);
-		if (controller->window == NULL)
-			BUG();
+		BUG_ON(controller->window == NULL);
 		controller->window[0].offset = legacy_io;
 		controller->window[0].resource.name = "legacy_io";
 		controller->window[0].resource.flags = IORESOURCE_IO;
@@ -168,8 +167,7 @@
 	idx = controller->windows;
 	new_count = controller->windows + count;
 	new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
-	if (new_window == NULL)
-		BUG();
+	BUG_ON(new_window == NULL);
 	if (controller->window) {
 		memcpy(new_window, controller->window,
 		       sizeof(struct pci_window) * controller->windows);
@@ -222,8 +220,7 @@
 		(u64) __pa(pcidev_info),
 		(u64) __pa(sn_irq_info));
 
-	if (status)
-		BUG(); /* Cannot get platform pci device information */
+	BUG_ON(status); /* Cannot get platform pci device information */
 
 
 	/* Copy over PIO Mapped Addresses */
@@ -307,8 +304,7 @@
 	prom_bussoft_ptr = __va(prom_bussoft_ptr);
 
 	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
-	if (!controller)
-		BUG();
+	BUG_ON(!controller);
 	controller->segment = segment;
 
 	/*
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 02c5b8a..e456f06 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -732,8 +732,7 @@
 		kl_config_hdr_t *klgraph_header;
 		nasid = cnodeid_to_nasid(node);
 		klgraph_header = ia64_sn_get_klconfig_addr(nasid);
-		if (klgraph_header == NULL)
-			BUG();
+		BUG_ON(klgraph_header == NULL);
 		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
 		while (brd) {
 			if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
@@ -750,7 +749,7 @@
 {
 	long cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		if (cpuid_to_nasid(cpu) == nasid &&
 					cpuid_to_slice(cpu) == slice)
 			return cpu;
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index e585f9a..1176506 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -133,7 +133,7 @@
 	unsigned long itc;
 
 	itc = ia64_get_itc();
-	smp_flush_tlb_cpumask(mm->cpu_vm_mask);
+	smp_flush_tlb_cpumask(*mm_cpumask(mm));
 	itc = ia64_get_itc() - itc;
 	__get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc;
 	__get_cpu_var(ptcstats).shub_ipi_flushes++;
@@ -182,7 +182,7 @@
 	nodes_clear(nodes_flushed);
 	i = 0;
 
-	for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+	for_each_cpu(cpu, mm_cpumask(mm)) {
 		cnode = cpu_to_node(cpu);
 		node_set(cnode, nodes_flushed);
 		lcpu = cpu;
@@ -461,7 +461,7 @@
 
 static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
 {
-	if (*offset < NR_CPUS)
+	if (*offset < nr_cpu_ids)
 		return offset;
 	return NULL;
 }
@@ -469,7 +469,7 @@
 static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
 {
 	(*offset)++;
-	if (*offset < NR_CPUS)
+	if (*offset < nr_cpu_ids)
 		return offset;
 	return NULL;
 }
@@ -491,7 +491,7 @@
 		seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt);
 	}
 
-	if (cpu < NR_CPUS && cpu_online(cpu)) {
+	if (cpu < nr_cpu_ids && cpu_online(cpu)) {
 		stat = &per_cpu(ptcstats, cpu);
 		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
@@ -554,7 +554,7 @@
 
 	proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
 				   NULL, &proc_sn2_ptc_operations);
-	if (!&proc_sn2_ptc_operations) {
+	if (!proc_sn2_ptc) {
 		printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
 		return -EINVAL;
 	}
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index be33947..9e6491c 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -275,8 +275,7 @@
 
 	/* get it's interconnect topology */
 	sz = op->ports * sizeof(struct sn_hwperf_port_info);
-	if (sz > sizeof(ptdata))
-		BUG();
+	BUG_ON(sz > sizeof(ptdata));
 	e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 			      SN_HWPERF_ENUM_PORTS, nodeobj->id, sz,
 			      (u64)&ptdata, 0, 0, NULL);
@@ -310,8 +309,7 @@
 	if (router && (!found_cpu || !found_mem)) {
 		/* search for a node connected to the same router */
 		sz = router->ports * sizeof(struct sn_hwperf_port_info);
-		if (sz > sizeof(ptdata))
-			BUG();
+		BUG_ON(sz > sizeof(ptdata));
 		e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 				      SN_HWPERF_ENUM_PORTS, router->id, sz,
 				      (u64)&ptdata, 0, 0, NULL);
@@ -612,7 +610,7 @@
 	op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
 
 	if (cpu != SN_HWPERF_ARG_ANY_CPU) {
-		if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+		if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 			r = -EINVAL;
 			goto out;
 		}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 060df4a..c659ad5 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -256,9 +256,7 @@
 
 	hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
 
-	if (!hubinfo) {
-		BUG();
-	}
+	BUG_ON(!hubinfo);
 
 	flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
 	if (flush_nasid_list->widget_p == NULL)
diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile
index 0ad0224..e6f4a0a 100644
--- a/arch/ia64/xen/Makefile
+++ b/arch/ia64/xen/Makefile
@@ -3,14 +3,29 @@
 #
 
 obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \
-	 hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o
+	 hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o \
+	 gate-data.o
 
 obj-$(CONFIG_IA64_GENERIC) += machvec.o
 
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+
+# tell compiled for xen
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_XEN
+AFLAGS_gate.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN -D__IA64_GATE_PARAVIRTUALIZED_XEN
+
+# use same file of native.
+$(obj)/gate.o: $(src)/../kernel/gate.S FORCE
+	$(call if_changed_dep,as_o_S)
+$(obj)/gate.lds: $(src)/../kernel/gate.lds.S FORCE
+	$(call if_changed_dep,cpp_lds_S)
+
+
 AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN
 
 # xen multi compile
-ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S
+ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S fsys.S
 ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o))
 obj-y += $(ASM_PARAVIRT_OBJS)
 define paravirtualized_xen
diff --git a/arch/ia64/xen/gate-data.S b/arch/ia64/xen/gate-data.S
new file mode 100644
index 0000000..7d4830a
--- /dev/null
+++ b/arch/ia64/xen/gate-data.S
@@ -0,0 +1,3 @@
+	.section .data.gate.xen, "aw"
+
+	.incbin "arch/ia64/xen/gate.so"
diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S
index 45e02bb..e32dae4 100644
--- a/arch/ia64/xen/hypercall.S
+++ b/arch/ia64/xen/hypercall.S
@@ -9,6 +9,7 @@
 #include <asm/intrinsics.h>
 #include <asm/xen/privop.h>
 
+#ifdef __INTEL_COMPILER
 /*
  * Hypercalls without parameter.
  */
@@ -72,6 +73,7 @@
 	br.ret.sptk.many rp
 	;;
 END(xen_set_rr0_to_rr4)
+#endif
 
 GLOBAL_ENTRY(xen_send_ipi)
 	mov r14=r32
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index 68d6204..fb83326 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -175,10 +175,58 @@
 	} while (unlikely(ret != lcycle));
 }
 
+/* based on xen_sched_clock() in arch/x86/xen/time.c. */
+/*
+ * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
+ * something similar logic should be implemented here.
+ */
+/*
+ * Xen sched_clock implementation.  Returns the number of unstolen
+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
+ * states.
+ */
+static unsigned long long xen_sched_clock(void)
+{
+	struct vcpu_runstate_info runstate;
+
+	unsigned long long now;
+	unsigned long long offset;
+	unsigned long long ret;
+
+	/*
+	 * Ideally sched_clock should be called on a per-cpu basis
+	 * anyway, so preempt should already be disabled, but that's
+	 * not current practice at the moment.
+	 */
+	preempt_disable();
+
+	/*
+	 * both ia64_native_sched_clock() and xen's runstate are
+	 * based on mAR.ITC. So difference of them makes sense.
+	 */
+	now = ia64_native_sched_clock();
+
+	get_runstate_snapshot(&runstate);
+
+	WARN_ON(runstate.state != RUNSTATE_running);
+
+	offset = 0;
+	if (now > runstate.state_entry_time)
+		offset = now - runstate.state_entry_time;
+	ret = runstate.time[RUNSTATE_blocked] +
+		runstate.time[RUNSTATE_running] +
+		offset;
+
+	preempt_enable();
+
+	return ret;
+}
+
 struct pv_time_ops xen_time_ops __initdata = {
 	.init_missing_ticks_accounting	= xen_init_missing_ticks_accounting,
 	.do_steal_accounting		= xen_do_steal_accounting,
 	.clocksource_resume		= xen_itc_jitter_data_reset,
+	.sched_clock			= xen_sched_clock,
 };
 
 /* Called after suspend, to resume time.  */
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
index 936cff3..5e2270a 100644
--- a/arch/ia64/xen/xen_pv_ops.c
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -24,6 +24,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/pm.h>
+#include <linux/unistd.h>
 
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/xencomm.h>
@@ -153,6 +154,13 @@
 	xen_setup_vcpu_info_placement();
 }
 
+#ifdef ASM_SUPPORTED
+static unsigned long __init_or_module
+xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+#endif
+static void __init
+xen_patch_branch(unsigned long tag, unsigned long type);
+
 static const struct pv_init_ops xen_init_ops __initconst = {
 	.banner = xen_banner,
 
@@ -163,6 +171,53 @@
 	.arch_setup_nomca = xen_arch_setup_nomca,
 
 	.post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu,
+#ifdef ASM_SUPPORTED
+	.patch_bundle = xen_patch_bundle,
+#endif
+	.patch_branch = xen_patch_branch,
+};
+
+/***************************************************************************
+ * pv_fsys_data
+ * addresses for fsys
+ */
+
+extern unsigned long xen_fsyscall_table[NR_syscalls];
+extern char xen_fsys_bubble_down[];
+struct pv_fsys_data xen_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)xen_fsyscall_table,
+	.fsys_bubble_down = (void *)xen_fsys_bubble_down,
+};
+
+/***************************************************************************
+ * pv_patchdata
+ * patchdata addresses
+ */
+
+#define DECLARE(name)							\
+	extern unsigned long __xen_start_gate_##name##_patchlist[];	\
+	extern unsigned long __xen_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __xen_start_gate_section[];
+
+#define ASSIGN(name)							\
+	.start_##name##_patchlist =					\
+		(unsigned long)__xen_start_gate_##name##_patchlist,	\
+	.end_##name##_patchlist =					\
+		(unsigned long)__xen_end_gate_##name##_patchlist
+
+static struct pv_patchdata xen_patchdata __initdata = {
+	ASSIGN(fsyscall),
+	ASSIGN(brl_fsys_bubble_down),
+	ASSIGN(vtop),
+	ASSIGN(mckinley_e9),
+
+	.gate_section = (void*)__xen_start_gate_section,
 };
 
 /***************************************************************************
@@ -170,6 +225,76 @@
  * intrinsics hooks.
  */
 
+#ifndef ASM_SUPPORTED
+static void
+xen_set_itm_with_offset(unsigned long val)
+{
+	/* ia64_cpu_local_tick() calls this with interrupt enabled. */
+	/* WARN_ON(!irqs_disabled()); */
+	xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+}
+
+static unsigned long
+xen_get_itm_with_offset(void)
+{
+	/* unused at this moment */
+	printk(KERN_DEBUG "%s is called.\n", __func__);
+
+	WARN_ON(!irqs_disabled());
+	return ia64_native_getreg(_IA64_REG_CR_ITM) +
+		XEN_MAPPEDREGS->itc_offset;
+}
+
+/* ia64_set_itc() is only called by
+ * cpu_init() with ia64_set_itc(0) and ia64_sync_itc().
+ * So XEN_MAPPEDRESG->itc_offset cal be considered as almost constant.
+ */
+static void
+xen_set_itc(unsigned long val)
+{
+	unsigned long mitc;
+
+	WARN_ON(!irqs_disabled());
+	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+	XEN_MAPPEDREGS->itc_offset = val - mitc;
+	XEN_MAPPEDREGS->itc_last = val;
+}
+
+static unsigned long
+xen_get_itc(void)
+{
+	unsigned long res;
+	unsigned long itc_offset;
+	unsigned long itc_last;
+	unsigned long ret_itc_last;
+
+	itc_offset = XEN_MAPPEDREGS->itc_offset;
+	do {
+		itc_last = XEN_MAPPEDREGS->itc_last;
+		res = ia64_native_getreg(_IA64_REG_AR_ITC);
+		res += itc_offset;
+		if (itc_last >= res)
+			res = itc_last + 1;
+		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+				       itc_last, res);
+	} while (unlikely(ret_itc_last != itc_last));
+	return res;
+
+#if 0
+	/* ia64_itc_udelay() calls ia64_get_itc() with interrupt enabled.
+	   Should it be paravirtualized instead? */
+	WARN_ON(!irqs_disabled());
+	itc_offset = XEN_MAPPEDREGS->itc_offset;
+	itc_last = XEN_MAPPEDREGS->itc_last;
+	res = ia64_native_getreg(_IA64_REG_AR_ITC);
+	res += itc_offset;
+	if (itc_last >= res)
+		res = itc_last + 1;
+	XEN_MAPPEDREGS->itc_last = res;
+	return res;
+#endif
+}
+
 static void xen_setreg(int regnum, unsigned long val)
 {
 	switch (regnum) {
@@ -181,11 +306,14 @@
 		xen_set_eflag(val);
 		break;
 #endif
+	case _IA64_REG_AR_ITC:
+		xen_set_itc(val);
+		break;
 	case _IA64_REG_CR_TPR:
 		xen_set_tpr(val);
 		break;
 	case _IA64_REG_CR_ITM:
-		xen_set_itm(val);
+		xen_set_itm_with_offset(val);
 		break;
 	case _IA64_REG_CR_EOI:
 		xen_eoi(val);
@@ -209,6 +337,12 @@
 		res = xen_get_eflag();
 		break;
 #endif
+	case _IA64_REG_AR_ITC:
+		res = xen_get_itc();
+		break;
+	case _IA64_REG_CR_ITM:
+		res = xen_get_itm_with_offset();
+		break;
 	case _IA64_REG_CR_IVR:
 		res = xen_get_ivr();
 		break;
@@ -259,8 +393,417 @@
 	else
 		xen_rsm_i();
 }
+#else
+#define __DEFINE_FUNC(name, code)					\
+	extern const char xen_ ## name ## _direct_start[];		\
+	extern const char xen_ ## name ## _direct_end[];		\
+	asm (".align 32\n"						\
+	     ".proc xen_" #name "\n"					\
+	     "xen_" #name ":\n"						\
+	     "xen_" #name "_direct_start:\n"				\
+	     code							\
+	     "xen_" #name "_direct_end:\n"				\
+	     "br.cond.sptk.many b6\n"					\
+	     ".endp xen_" #name "\n")
 
-static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+#define DEFINE_VOID_FUNC0(name, code)		\
+	extern void				\
+	xen_ ## name (void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code)		\
+	extern void				\
+	xen_ ## name (unsigned long arg);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code)	\
+	extern void				\
+	xen_ ## name (void *arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code)		\
+	extern void				\
+	xen_ ## name (unsigned long arg0,	\
+		      unsigned long arg1);	\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code)		\
+	extern unsigned long			\
+	xen_ ## name (void);			\
+	__DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code)		\
+	extern unsigned long			\
+	xen_ ## name (type arg);		\
+	__DEFINE_FUNC(name, code)
+
+#define XEN_PSR_I_ADDR_ADDR     (XSI_BASE + XSI_PSR_I_ADDR_OFS)
+
+/*
+ * static void xen_set_itm_with_offset(unsigned long val)
+ *        xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC1(set_itm_with_offset,
+		  "mov r2 = " __stringify(XSI_BASE) " + "
+		  __stringify(XSI_ITC_OFFSET_OFS) "\n"
+		  ";;\n"
+		  "ld8 r3 = [r2]\n"
+		  ";;\n"
+		  "sub r8 = r8, r3\n"
+		  "break " __stringify(HYPERPRIVOP_SET_ITM) "\n");
+
+/*
+ * static unsigned long xen_get_itm_with_offset(void)
+ *    return ia64_native_getreg(_IA64_REG_CR_ITM) + XEN_MAPPEDREGS->itc_offset;
+ */
+/* 2 bundles */
+DEFINE_FUNC0(get_itm_with_offset,
+	     "mov r2 = " __stringify(XSI_BASE) " + "
+	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
+	     ";;\n"
+	     "ld8 r3 = [r2]\n"
+	     "mov r8 = cr.itm\n"
+	     ";;\n"
+	     "add r8 = r8, r2\n");
+
+/*
+ * static void xen_set_itc(unsigned long val)
+ *	unsigned long mitc;
+ *
+ *	WARN_ON(!irqs_disabled());
+ *	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+ *	XEN_MAPPEDREGS->itc_offset = val - mitc;
+ *	XEN_MAPPEDREGS->itc_last = val;
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC1(set_itc,
+		  "mov r2 = " __stringify(XSI_BASE) " + "
+		  __stringify(XSI_ITC_LAST_OFS) "\n"
+		  "mov r3 = ar.itc\n"
+		  ";;\n"
+		  "sub r3 = r8, r3\n"
+		  "st8 [r2] = r8, "
+		  __stringify(XSI_ITC_LAST_OFS) " - "
+		  __stringify(XSI_ITC_OFFSET_OFS) "\n"
+		  ";;\n"
+		  "st8 [r2] = r3\n");
+
+/*
+ * static unsigned long xen_get_itc(void)
+ *	unsigned long res;
+ *	unsigned long itc_offset;
+ *	unsigned long itc_last;
+ *	unsigned long ret_itc_last;
+ *
+ *	itc_offset = XEN_MAPPEDREGS->itc_offset;
+ *	do {
+ *		itc_last = XEN_MAPPEDREGS->itc_last;
+ *		res = ia64_native_getreg(_IA64_REG_AR_ITC);
+ *		res += itc_offset;
+ *		if (itc_last >= res)
+ *			res = itc_last + 1;
+ *		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+ *				       itc_last, res);
+ *	} while (unlikely(ret_itc_last != itc_last));
+ *	return res;
+ */
+/* 5 bundles */
+DEFINE_FUNC0(get_itc,
+	     "mov r2 = " __stringify(XSI_BASE) " + "
+	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
+	     ";;\n"
+	     "ld8 r9 = [r2], " __stringify(XSI_ITC_LAST_OFS) " - "
+	     __stringify(XSI_ITC_OFFSET_OFS) "\n"
+					/* r9 = itc_offset */
+					/* r2 = XSI_ITC_OFFSET */
+	     "888:\n"
+	     "mov r8 = ar.itc\n"	/* res = ar.itc */
+	     ";;\n"
+	     "ld8 r3 = [r2]\n"		/* r3 = itc_last */
+	     "add r8 = r8, r9\n"	/* res = ar.itc + itc_offset */
+	     ";;\n"
+	     "cmp.gtu p6, p0 = r3, r8\n"
+	     ";;\n"
+	     "(p6) add r8 = 1, r3\n"	/* if (itc_last > res) itc_last + 1 */
+	     ";;\n"
+	     "mov ar.ccv = r8\n"
+	     ";;\n"
+	     "cmpxchg8.acq r10 = [r2], r8, ar.ccv\n"
+	     ";;\n"
+	     "cmp.ne p6, p0 = r10, r3\n"
+	     "(p6) hint @pause\n"
+	     "(p6) br.cond.spnt 888b\n");
+
+DEFINE_VOID_FUNC1_VOID(fc,
+		       "break " __stringify(HYPERPRIVOP_FC) "\n");
+
+/*
+ * psr_i_addr_addr = XEN_PSR_I_ADDR_ADDR
+ * masked_addr = *psr_i_addr_addr
+ * pending_intr_addr = masked_addr - 1
+ * if (val & IA64_PSR_I) {
+ *   masked = *masked_addr
+ *   *masked_addr = 0:xen_set_virtual_psr_i(1)
+ *   compiler barrier
+ *   if (masked) {
+ *      uint8_t pending = *pending_intr_addr;
+ *      if (pending)
+ *              XEN_HYPER_SSM_I
+ *   }
+ * } else {
+ *   *masked_addr = 1:xen_set_virtual_psr_i(0)
+ * }
+ */
+/* 6 bundles */
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+		  /* r8 = input value: 0 or IA64_PSR_I
+		   * p6 =  (flags & IA64_PSR_I)
+		   *    = if clause
+		   * p7 = !(flags & IA64_PSR_I)
+		   *    = else clause
+		   */
+		  "cmp.ne p6, p7 = r8, r0\n"
+		  "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+		  ";;\n"
+		  /* r9 = XEN_PSR_I_ADDR */
+		  "ld8 r9 = [r9]\n"
+		  ";;\n"
+
+		  /* r10 = masked previous value */
+		  "(p6)	ld1.acq r10 = [r9]\n"
+		  ";;\n"
+
+		  /* p8 = !masked interrupt masked previously? */
+		  "(p6)	cmp.ne.unc p8, p0 = r10, r0\n"
+
+		  /* p7 = else clause */
+		  "(p7)	mov r11 = 1\n"
+		  ";;\n"
+		  /* masked = 1 */
+		  "(p7)	st1.rel [r9] = r11\n"
+
+		  /* p6 = if clause */
+		  /* masked = 0
+		   * r9 = masked_addr - 1
+		   *    = pending_intr_addr
+		   */
+		  "(p8)	st1.rel [r9] = r0, -1\n"
+		  ";;\n"
+		  /* r8 = pending_intr */
+		  "(p8)	ld1.acq r11 = [r9]\n"
+		  ";;\n"
+		  /* p9 = interrupt pending? */
+		  "(p8)	cmp.ne.unc p9, p10 = r11, r0\n"
+		  ";;\n"
+		  "(p10) mf\n"
+		  /* issue hypercall to trigger interrupt */
+		  "(p9)	break " __stringify(HYPERPRIVOP_SSM_I) "\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+		  "break " __stringify(HYPERPRIVOP_PTC_GA) "\n");
+DEFINE_VOID_FUNC2(set_rr,
+		  "break " __stringify(HYPERPRIVOP_SET_RR) "\n");
+
+/*
+ * tmp = XEN_MAPPEDREGS->interrupt_mask_addr = XEN_PSR_I_ADDR_ADDR;
+ * tmp = *tmp
+ * tmp = *tmp;
+ * psr_i = tmp? 0: IA64_PSR_I;
+ */
+/* 4 bundles */
+DEFINE_FUNC0(get_psr_i,
+	     "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+	     ";;\n"
+	     "ld8 r9 = [r9]\n"			/* r9 = XEN_PSR_I_ADDR */
+	     "mov r8 = 0\n"			/* psr_i = 0 */
+	     ";;\n"
+	     "ld1.acq r9 = [r9]\n"		/* r9 = XEN_PSR_I */
+	     ";;\n"
+	     "cmp.eq.unc p6, p0 = r9, r0\n"	/* p6 = (XEN_PSR_I != 0) */
+	     ";;\n"
+	     "(p6) mov r8 = " __stringify(1 << IA64_PSR_I_BIT) "\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+	     "break " __stringify(HYPERPRIVOP_THASH) "\n");
+DEFINE_FUNC1(get_cpuid, int,
+	     "break " __stringify(HYPERPRIVOP_GET_CPUID) "\n");
+DEFINE_FUNC1(get_pmd, int,
+	     "break " __stringify(HYPERPRIVOP_GET_PMD) "\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+	     "break " __stringify(HYPERPRIVOP_GET_RR) "\n");
+
+/*
+ * void xen_privop_ssm_i(void)
+ *
+ * int masked = !xen_get_virtual_psr_i();
+ *	// masked = *(*XEN_MAPPEDREGS->interrupt_mask_addr)
+ * xen_set_virtual_psr_i(1)
+ *	// *(*XEN_MAPPEDREGS->interrupt_mask_addr) = 0
+ * // compiler barrier
+ * if (masked) {
+ *	uint8_t* pend_int_addr =
+ *		(uint8_t*)(*XEN_MAPPEDREGS->interrupt_mask_addr) - 1;
+ *	uint8_t pending = *pend_int_addr;
+ *	if (pending)
+ *		XEN_HYPER_SSM_I
+ * }
+ */
+/* 4 bundles */
+DEFINE_VOID_FUNC0(ssm_i,
+		  "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+		  ";;\n"
+		  "ld8 r8 = [r8]\n"		/* r8 = XEN_PSR_I_ADDR */
+		  ";;\n"
+		  "ld1.acq r9 = [r8]\n"		/* r9 = XEN_PSR_I */
+		  ";;\n"
+		  "st1.rel [r8] = r0, -1\n"	/* psr_i = 0. enable interrupt
+						 * r8 = XEN_PSR_I_ADDR - 1
+						 *    = pend_int_addr
+						 */
+		  "cmp.eq.unc p0, p6 = r9, r0\n"/* p6 = !XEN_PSR_I
+						 * previously interrupt
+						 * masked?
+						 */
+		  ";;\n"
+		  "(p6) ld1.acq r8 = [r8]\n"	/* r8 = xen_pend_int */
+		  ";;\n"
+		  "(p6) cmp.eq.unc p6, p7 = r8, r0\n"	/*interrupt pending?*/
+		  ";;\n"
+		  /* issue hypercall to get interrupt */
+		  "(p7) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
+		  ";;\n");
+
+/*
+ * psr_i_addr_addr = XEN_MAPPEDREGS->interrupt_mask_addr
+ *		   = XEN_PSR_I_ADDR_ADDR;
+ * psr_i_addr = *psr_i_addr_addr;
+ * *psr_i_addr = 1;
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC0(rsm_i,
+		  "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+						/* r8 = XEN_PSR_I_ADDR */
+		  "mov r9 = 1\n"
+		  ";;\n"
+		  "ld8 r8 = [r8]\n"		/* r8 = XEN_PSR_I */
+		  ";;\n"
+		  "st1.rel [r8] = r9\n");	/* XEN_PSR_I = 1 */
+
+extern void
+xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+		   unsigned long val2, unsigned long val3,
+		   unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+	      "break " __stringify(HYPERPRIVOP_SET_RR0_TO_RR4) "\n");
+
+
+extern unsigned long xen_getreg(int regnum);
+#define __DEFINE_GET_REG(id, privop)					\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"			\
+	";;\n"								\
+	"cmp.eq p6, p0 = r2, r8\n"					\
+	";;\n"								\
+	"(p6) break " __stringify(HYPERPRIVOP_GET_ ## privop) "\n"	\
+	"(p6) br.cond.sptk.many b6\n"					\
+	";;\n"
+
+__DEFINE_FUNC(getreg,
+	      __DEFINE_GET_REG(PSR, PSR)
+#ifdef CONFIG_IA32_SUPPORT
+	      __DEFINE_GET_REG(AR_EFLAG, EFLAG)
+#endif
+
+	      /* get_itc */
+	      "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_get_itc\n"
+	      ";;\n"
+
+	      /* get itm */
+	      "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_get_itm_with_offset\n"
+	      ";;\n"
+
+	      __DEFINE_GET_REG(CR_IVR, IVR)
+	      __DEFINE_GET_REG(CR_TPR, TPR)
+
+	      /* fall back */
+	      "movl r2 = ia64_native_getreg_func\n"
+	      ";;\n"
+	      "mov b7 = r2\n"
+	      ";;\n"
+	      "br.cond.sptk.many b7\n");
+
+extern void xen_setreg(int regnum, unsigned long val);
+#define __DEFINE_SET_REG(id, privop)					\
+	"mov r2 = " __stringify(_IA64_REG_ ## id) "\n"			\
+	";;\n"								\
+	"cmp.eq p6, p0 = r2, r9\n"					\
+	";;\n"								\
+	"(p6) break " __stringify(HYPERPRIVOP_ ## privop) "\n"		\
+	"(p6) br.cond.sptk.many b6\n"					\
+	";;\n"
+
+__DEFINE_FUNC(setreg,
+	      /* kr0 .. kr 7*/
+	      /*
+	       * if (_IA64_REG_AR_KR0 <= regnum &&
+	       *     regnum <= _IA64_REG_AR_KR7) {
+	       *     register __index asm ("r8") = regnum - _IA64_REG_AR_KR0
+	       *     register __val asm ("r9") = val
+	       *    "break HYPERPRIVOP_SET_KR"
+	       * }
+	       */
+	      "mov r17 = r9\n"
+	      "mov r2 = " __stringify(_IA64_REG_AR_KR0) "\n"
+	      ";;\n"
+	      "cmp.ge p6, p0 = r9, r2\n"
+	      "sub r17 = r17, r2\n"
+	      ";;\n"
+	      "(p6) cmp.ge.unc p7, p0 = "
+	      __stringify(_IA64_REG_AR_KR7) " - " __stringify(_IA64_REG_AR_KR0)
+	      ", r17\n"
+	      ";;\n"
+	      "(p7) mov r9 = r8\n"
+	      ";;\n"
+	      "(p7) mov r8 = r17\n"
+	      "(p7) break " __stringify(HYPERPRIVOP_SET_KR) "\n"
+
+	      /* set itm */
+	      "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_set_itm_with_offset\n"
+
+	      /* set itc */
+	      "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
+	      ";;\n"
+	      "cmp.eq p6, p0 = r2, r8\n"
+	      ";;\n"
+	      "(p6) br.cond.spnt xen_set_itc\n"
+
+#ifdef CONFIG_IA32_SUPPORT
+	      __DEFINE_SET_REG(AR_EFLAG, SET_EFLAG)
+#endif
+	      __DEFINE_SET_REG(CR_TPR, SET_TPR)
+	      __DEFINE_SET_REG(CR_EOI, EOI)
+
+	      /* fall back */
+	      "movl r2 = ia64_native_setreg_func\n"
+	      ";;\n"
+	      "mov b7 = r2\n"
+	      ";;\n"
+	      "br.cond.sptk.many b7\n");
+#endif
+
+static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.fc		= xen_fc,
 	.thash		= xen_thash,
 	.get_cpuid	= xen_get_cpuid,
@@ -337,7 +880,7 @@
 	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
 }
 
-static const struct pv_iosapic_ops xen_iosapic_ops __initconst = {
+static struct pv_iosapic_ops xen_iosapic_ops __initdata = {
 	.pcat_compat_init = xen_pcat_compat_init,
 	.__get_irq_chip = xen_iosapic_get_irq_chip,
 
@@ -355,6 +898,8 @@
 	xen_info_init();
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
+	pv_fsys_data = xen_fsys_data;
+	pv_patchdata = xen_patchdata;
 	pv_cpu_ops = xen_cpu_ops;
 	pv_iosapic_ops = xen_iosapic_ops;
 	pv_irq_ops = xen_irq_ops;
@@ -362,3 +907,252 @@
 
 	paravirt_cpu_asm_init(&xen_cpu_asm_switch);
 }
+
+#ifdef ASM_SUPPORTED
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#define DEFINE_FUNC_GETREG(name, privop)				\
+	DEFINE_FUNC0(get_ ## name,					\
+		     "break "__stringify(HYPERPRIVOP_GET_ ## privop) "\n")
+
+DEFINE_FUNC_GETREG(psr, PSR);
+DEFINE_FUNC_GETREG(eflag, EFLAG);
+DEFINE_FUNC_GETREG(ivr, IVR);
+DEFINE_FUNC_GETREG(tpr, TPR);
+
+#define DEFINE_FUNC_SET_KR(n)						\
+	DEFINE_VOID_FUNC0(set_kr ## n,					\
+			  ";;\n"					\
+			  "mov r9 = r8\n"				\
+			  "mov r8 = " #n "\n"				\
+			  "break " __stringify(HYPERPRIVOP_SET_KR) "\n")
+
+DEFINE_FUNC_SET_KR(0);
+DEFINE_FUNC_SET_KR(1);
+DEFINE_FUNC_SET_KR(2);
+DEFINE_FUNC_SET_KR(3);
+DEFINE_FUNC_SET_KR(4);
+DEFINE_FUNC_SET_KR(5);
+DEFINE_FUNC_SET_KR(6);
+DEFINE_FUNC_SET_KR(7);
+
+#define __DEFINE_FUNC_SETREG(name, privop)				\
+	DEFINE_VOID_FUNC0(name,						\
+			  "break "__stringify(HYPERPRIVOP_ ## privop) "\n")
+
+#define DEFINE_FUNC_SETREG(name, privop)			\
+	__DEFINE_FUNC_SETREG(set_ ## name, SET_ ## privop)
+
+DEFINE_FUNC_SETREG(eflag, EFLAG);
+DEFINE_FUNC_SETREG(tpr, TPR);
+__DEFINE_FUNC_SETREG(eoi, EOI);
+
+extern const char xen_check_events[];
+extern const char __xen_intrin_local_irq_restore_direct_start[];
+extern const char __xen_intrin_local_irq_restore_direct_end[];
+extern const unsigned long __xen_intrin_local_irq_restore_direct_reloc;
+
+asm (
+	".align 32\n"
+	".proc xen_check_events\n"
+	"xen_check_events:\n"
+	/* masked = 0
+	 * r9 = masked_addr - 1
+	 *    = pending_intr_addr
+	 */
+	"st1.rel [r9] = r0, -1\n"
+	";;\n"
+	/* r8 = pending_intr */
+	"ld1.acq r11 = [r9]\n"
+	";;\n"
+	/* p9 = interrupt pending? */
+	"cmp.ne p9, p10 = r11, r0\n"
+	";;\n"
+	"(p10) mf\n"
+	/* issue hypercall to trigger interrupt */
+	"(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
+	"br.cond.sptk.many b6\n"
+	".endp xen_check_events\n"
+	"\n"
+	".align 32\n"
+	".proc __xen_intrin_local_irq_restore_direct\n"
+	"__xen_intrin_local_irq_restore_direct:\n"
+	"__xen_intrin_local_irq_restore_direct_start:\n"
+	"1:\n"
+	"{\n"
+	"cmp.ne p6, p7 = r8, r0\n"
+	"mov r17 = ip\n" /* get ip to calc return address */
+	"mov r9 = "__stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+	";;\n"
+	"}\n"
+	"{\n"
+	/* r9 = XEN_PSR_I_ADDR */
+	"ld8 r9 = [r9]\n"
+	";;\n"
+	/* r10 = masked previous value */
+	"(p6) ld1.acq r10 = [r9]\n"
+	"adds r17 =  1f - 1b, r17\n" /* calculate return address */
+	";;\n"
+	"}\n"
+	"{\n"
+	/* p8 = !masked interrupt masked previously? */
+	"(p6) cmp.ne.unc p8, p0 = r10, r0\n"
+	"\n"
+	/* p7 = else clause */
+	"(p7) mov r11 = 1\n"
+	";;\n"
+	"(p8) mov b6 = r17\n" /* set return address */
+	"}\n"
+	"{\n"
+	/* masked = 1 */
+	"(p7) st1.rel [r9] = r11\n"
+	"\n"
+	"[99:]\n"
+	"(p8) brl.cond.dptk.few xen_check_events\n"
+	"}\n"
+	/* pv calling stub is 5 bundles. fill nop to adjust return address */
+	"{\n"
+	"nop 0\n"
+	"nop 0\n"
+	"nop 0\n"
+	"}\n"
+	"1:\n"
+	"__xen_intrin_local_irq_restore_direct_end:\n"
+	".endp __xen_intrin_local_irq_restore_direct\n"
+	"\n"
+	".align 8\n"
+	"__xen_intrin_local_irq_restore_direct_reloc:\n"
+	"data8 99b\n"
+);
+
+static struct paravirt_patch_bundle_elem xen_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define XEN_PATCH_BUNDLE_ELEM(name, type)		\
+	{						\
+		(void*)xen_ ## name ## _direct_start,	\
+		(void*)xen_ ## name ## _direct_end,	\
+		PARAVIRT_PATCH_TYPE_ ## type,		\
+	}
+
+	XEN_PATCH_BUNDLE_ELEM(fc, FC),
+	XEN_PATCH_BUNDLE_ELEM(thash, THASH),
+	XEN_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+	XEN_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+	XEN_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+	XEN_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+	XEN_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+	XEN_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+	XEN_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+	XEN_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+	XEN_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+	{
+		(void*)__xen_intrin_local_irq_restore_direct_start,
+		(void*)__xen_intrin_local_irq_restore_direct_end,
+		PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE,
+	},
+
+#define XEN_PATCH_BUNDLE_ELEM_GETREG(name, reg)			\
+	{							\
+		xen_get_ ## name ## _direct_start,		\
+		xen_get_ ## name ## _direct_end,		\
+		PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \
+	}
+
+	XEN_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+	XEN_PATCH_BUNDLE_ELEM_GETREG(eflag, AR_EFLAG),
+
+	XEN_PATCH_BUNDLE_ELEM_GETREG(ivr, CR_IVR),
+	XEN_PATCH_BUNDLE_ELEM_GETREG(tpr, CR_TPR),
+
+	XEN_PATCH_BUNDLE_ELEM_GETREG(itc, AR_ITC),
+	XEN_PATCH_BUNDLE_ELEM_GETREG(itm_with_offset, CR_ITM),
+
+
+#define __XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg)		\
+	{							\
+		xen_ ## name ## _direct_start,			\
+		xen_ ## name ## _direct_end,			\
+		PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \
+	}
+
+#define XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg)			\
+	__XEN_PATCH_BUNDLE_ELEM_SETREG(set_ ## name, reg)
+
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr0, AR_KR0),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr1, AR_KR1),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr2, AR_KR2),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr3, AR_KR3),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr4, AR_KR4),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr5, AR_KR5),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr6, AR_KR6),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(kr7, AR_KR7),
+
+	XEN_PATCH_BUNDLE_ELEM_SETREG(eflag, AR_EFLAG),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(tpr, CR_TPR),
+	__XEN_PATCH_BUNDLE_ELEM_SETREG(eoi, CR_EOI),
+
+	XEN_PATCH_BUNDLE_ELEM_SETREG(itc, AR_ITC),
+	XEN_PATCH_BUNDLE_ELEM_SETREG(itm_with_offset, CR_ITM),
+};
+
+static unsigned long __init_or_module
+xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+	const unsigned long nelems = sizeof(xen_patch_bundle_elems) /
+		sizeof(xen_patch_bundle_elems[0]);
+	unsigned long used;
+	const struct paravirt_patch_bundle_elem *found;
+
+	used = __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+					     xen_patch_bundle_elems, nelems,
+					     &found);
+
+	if (found == NULL)
+		/* fallback */
+		return ia64_native_patch_bundle(sbundle, ebundle, type);
+	if (used == 0)
+		return used;
+
+	/* relocation */
+	switch (type) {
+	case PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE: {
+		unsigned long reloc =
+			__xen_intrin_local_irq_restore_direct_reloc;
+		unsigned long reloc_offset = reloc - (unsigned long)
+			__xen_intrin_local_irq_restore_direct_start;
+		unsigned long tag = (unsigned long)sbundle + reloc_offset;
+		paravirt_patch_reloc_brl(tag, xen_check_events);
+		break;
+	}
+	default:
+		/* nothing */
+		break;
+	}
+	return used;
+}
+#endif /* ASM_SUPPOTED */
+
+const struct paravirt_patch_branch_target xen_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type)			\
+	{						\
+		&xen_ ## name,				\
+		PARAVIRT_PATCH_TYPE_BR_ ## type,	\
+	}
+	PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+	PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+	PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+	PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+xen_patch_branch(unsigned long tag, unsigned long type)
+{
+	const unsigned long nelem =
+		sizeof(xen_branch_target) / sizeof(xen_branch_target[0]);
+	__paravirt_patch_apply_branch(tag, type, xen_branch_target, nelem);
+}