x86_64: initialize this_cpu_off to __per_cpu_load

On x86_64, if get_per_cpu_var() is used before per cpu area is setup
(if lockdep is turned on, it happens), it needs this_cpu_off to point
to __per_cpu_load.  Initialize accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 84395fa..7e15781 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -3,8 +3,13 @@
  */
 #include <linux/module.h>
 #include <asm/smp.h>
+#include <asm/sections.h>
 
+#ifdef CONFIG_X86_64
+DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load;
+#else
 DEFINE_PER_CPU(unsigned long, this_cpu_off);
+#endif
 EXPORT_PER_CPU_SYMBOL(this_cpu_off);
 
 #ifdef CONFIG_X86_32