Readd the arch ffz functions

May as well use the single op version if we have them, it's
generally a lot faster than the generic_ffz()

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/arch/arch-ppc.h b/arch/arch-ppc.h
index 4d77105..6256d05 100644
--- a/arch/arch-ppc.h
+++ b/arch/arch-ppc.h
@@ -28,4 +28,20 @@
 	__asm__ __volatile__ ("sync" : : : "memory")
 #endif
 
+static inline int __ilog2(unsigned long bitmask)
+{
+	int lz;
+
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (bitmask));
+	return 31 - lz;
+}
+
+static inline int arch_ffz(unsigned long bitmask)
+{
+	if ((bitmask = ~bitmask) == 0)
+		return 32;
+	return  __ilog2(bitmask & -bitmask);
+}
+#define ARCH_HAVE_FFZ
+
 #endif