allow implementing a_cas_p with pointer-sized ll/sc

No current ports do this, but it will be useful for porting to 64-bit ll/sc
architectures, such as mips64 and powerpc64.
diff --git a/src/internal/atomic.h b/src/internal/atomic.h
index 2097247..6f37d25 100644
--- a/src/internal/atomic.h
+++ b/src/internal/atomic.h
@@ -82,6 +82,23 @@
 
 #endif
 
+#ifdef a_ll_p
+
+#ifndef a_cas_p
+#define a_cas_p a_cas_p
+static inline void *a_cas_p(volatile void *p, void *t, void *s)
+{
+	void *old;
+	a_pre_llsc();
+	do old = a_ll_p(p);
+	while (old==t && !a_sc_p(p, s));
+	a_post_llsc();
+	return old;
+}
+#endif
+
+#endif
+
 #ifndef a_cas
 #error missing definition of a_cas
 #endif
@@ -209,6 +226,7 @@
 #endif
 
 #ifndef a_cas_p
+typedef char a_cas_p_undefined_but_pointer_not_32bit[-sizeof(char) == 0xffffffff ? 1 : -1];
 #define a_cas_p a_cas_p
 static inline void *a_cas_p(volatile void *p, void *t, void *s)
 {