Readd the arch ffz functions

May as well use the single op version if we have them, it's
generally a lot faster than the generic_ffz()

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h
index b49fd5f..972c623 100644
--- a/arch/arch-x86_64.h
+++ b/arch/arch-x86_64.h
@@ -32,4 +32,11 @@
 #define nop		__asm__ __volatile__("rep;nop": : :"memory")
 #define read_barrier()	asm volatile("lfence":::"memory")
 
+static inline unsigned long arch_ffz(unsigned long bitmask)
+{
+	__asm__("bsfq %1,%0" :"=r" (bitmask) :"r" (~bitmask));
+	return bitmask;
+}
+#define ARCH_HAVE_FFZ
+
 #endif