add word-sized ctz function to atomic.h

strictly speaking this and a few other ops should be factored into
asm.h or the file should just be renamed to asm.h, but whatever. clean
it up someday.
diff --git a/arch/i386/atomic.h b/arch/i386/atomic.h
index bf3c336..66059af 100644
--- a/arch/i386/atomic.h
+++ b/arch/i386/atomic.h
@@ -1,5 +1,5 @@
-#ifndef _INTERNAA_ATOMIC_H
-#define _INTERNAA_ATOMIC_H
+#ifndef _INTERNAL_ATOMIC_H
+#define _INTERNAL_ATOMIC_H
 
 #include <stdint.h>
 
@@ -11,6 +11,12 @@
 	return r;
 }
 
+static inline int a_ctz_l(unsigned long x)
+{
+	long r;
+	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
+	return r;
+}
 
 static inline void a_and_64(volatile uint64_t *p, uint64_t v)
 {
diff --git a/arch/x86_64/atomic.h b/arch/x86_64/atomic.h
index 04f6c28..3235db1 100644
--- a/arch/x86_64/atomic.h
+++ b/arch/x86_64/atomic.h
@@ -1,5 +1,5 @@
-#ifndef _INTERNAA_ATOMIC_H
-#define _INTERNAA_ATOMIC_H
+#ifndef _INTERNAL_ATOMIC_H
+#define _INTERNAL_ATOMIC_H
 
 #include <stdint.h>
 
@@ -10,6 +10,12 @@
 	return r;
 }
 
+static inline int a_ctz_l(unsigned long x)
+{
+	long r;
+	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
+	return r;
+}
 
 static inline void a_and_64(volatile uint64_t *p, uint64_t v)
 {