x86: Fix oprofile compilation in pic mode.

This patch modifies the inline x86 assembly fragments to properly
save ebx when calling the cpuid instruction. Otherwise, the compiler
will error, complaining that the PIC register (ebx) is clobbered.

Change-Id: Ibc091484068dfb87812c59dbc3976d26d922eebc
diff --git a/libop/op_hw_specific.h b/libop/op_hw_specific.h
index 35080ad..27c44ec 100644
--- a/libop/op_hw_specific.h
+++ b/libop/op_hw_specific.h
@@ -20,10 +20,43 @@
 		char v[12];
 	} v;
 	unsigned eax;
+#ifdef __PIC__
+        __asm__ __volatile__(
+            "pushl %%ebx\n"      /* must be preserved due to PIC code */
+            "cpuid\n"
+            "mov %%ebx, 0(%%edi)\n"
+            "mov %%ecx, 4(%%edi)\n"
+            "mov %%edx, 8(%%edi)\n"
+            "popl %%ebx\n"
+            : "=a" (eax)
+            : "a"(0), "D"(v.v)
+            : "%ecx", "%edx"
+        );
+#else
 	asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0));
+#endif
 	return !strncmp(v.v, vnd, 12);
 }
 
+static inline unsigned arch_cpuid_1(int code)
+{
+    unsigned val;
+#ifdef __PIC__
+        __asm__ __volatile__ (
+            "pushl %%ebx\n"
+            "cpuid\n"
+            "popl %%ebx\n"
+            : "=a" (val)
+            : "a" (code)
+            : "ecx", "edx"
+        );
+#else
+        asm("cpuid" : "=a" (v.eax) : "a" (code) : "ecx","ebx","edx");
+#endif
+        return val;
+}
+
+
 /* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates
    unhalted reference cycle architectural event is supported. We assume
    steppings after C0 report correct data in CPUID. */
@@ -46,7 +79,7 @@
 
 	if (!cpuid_vendor("GenuineIntel"))
 		return;
-	asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx");
+        arch_cpuid_1(1);
 	model = (v.ext_model << 4) + v.model;
 	if (v.family != 6 || model != 26 || v.stepping > 4)
 		return;
@@ -57,7 +90,19 @@
 {
 	if (cpu_type == CPU_ARCH_PERFMON) { 
 		unsigned ebx, eax;
+#ifdef __PIC__
+                __asm__ __volatile__ (
+                    "pushl %%ebx\n"
+                    "cpuid\n"
+                    "mov %%ebx, %%ecx\n"
+                    "popl %%ebx"
+                    : "=a" (eax), "=c" (ebx)
+                    : "a" (0xa)
+                    : "edx"
+                );
+#else
 		asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx");
+#endif
 		workaround_nehalem_aaj79(&ebx);
 		return ebx & num_to_mask(eax >> 24);
 	}
@@ -67,8 +112,7 @@
 static inline int arch_num_counters(op_cpu cpu_type) 
 {
 	if (cpu_type == CPU_ARCH_PERFMON) {
-		unsigned v;
-		asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
+		unsigned v = arch_cpuid_1(0xa);
 		return (v >> 8) & 0xff;
 	} 
 	return -1;
@@ -76,9 +120,8 @@
 
 static inline unsigned arch_get_counter_mask(void)
 {
-	unsigned v;
-	asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx");
-	return num_to_mask((v >> 8) & 0xff);	
+	unsigned v = arch_cpuid_1(0xa);
+	return num_to_mask((v >> 8) & 0xff);
 }
 
 #else