Merge "Fix cpu_set_t"
diff --git a/libc/bionic/sched_cpucount.c b/libc/bionic/sched_cpucount.c
index 9458dc8..2ea1d3f 100644
--- a/libc/bionic/sched_cpucount.c
+++ b/libc/bionic/sched_cpucount.c
@@ -28,13 +28,14 @@
 #define _GNU_SOURCE 1
 #include <sched.h>
 
-int __sched_cpucount(size_t setsize, cpu_set_t* set)
-{
-    int nn = 0, nn_max = setsize / sizeof(__CPU_BITTYPE);
-    int count = 0;
+int __sched_cpucount(size_t setsize, cpu_set_t* set) {
+  int nn = 0;
+  int nn_max = setsize / sizeof(__CPU_BITTYPE);
+  int count = 0;
 
-    for ( ; nn < nn_max; nn++ )
-        count += __builtin_popcount(set->__bits[nn]);
+  for ( ; nn < nn_max; nn++ ) {
+    count += __builtin_popcountl(set->__bits[nn]);
+  }
 
-    return count;
+  return count;
 }
diff --git a/libc/include/sched.h b/libc/include/sched.h
index 7649e83..68115bb 100644
--- a/libc/include/sched.h
+++ b/libc/include/sched.h
@@ -59,151 +59,90 @@
 extern int sched_getcpu(void);
 extern int setns(int, int);
 
-/* Our implementation supports up to 32 independent CPUs, which is also
- * the maximum supported by the kernel at the moment. GLibc uses 1024 by
- * default.
- *
- * If you want to use more than that, you should use CPU_ALLOC() / CPU_FREE()
- * and the CPU_XXX_S() macro variants.
- */
-#define CPU_SETSIZE   32
+#ifdef __LP32__
+#define CPU_SETSIZE 32
+#else
+#define CPU_SETSIZE 1024
+#endif
 
-#define __CPU_BITTYPE    unsigned long int  /* mandated by the kernel  */
-#define __CPU_BITSHIFT   5                  /* should be log2(BITTYPE) */
-#define __CPU_BITS       (1 << __CPU_BITSHIFT)
-#define __CPU_ELT(x)     ((x) >> __CPU_BITSHIFT)
-#define __CPU_MASK(x)    ((__CPU_BITTYPE)1 << ((x) & (__CPU_BITS-1)))
+#define __CPU_BITTYPE  unsigned long int  /* mandated by the kernel  */
+#define __CPU_BITS     (8 * sizeof(__CPU_BITTYPE))
+#define __CPU_ELT(x)   ((x) / __CPU_BITS)
+#define __CPU_MASK(x)  ((__CPU_BITTYPE)1 << ((x) & (__CPU_BITS - 1)))
 
 typedef struct {
-    __CPU_BITTYPE  __bits[ CPU_SETSIZE / __CPU_BITS ];
+  __CPU_BITTYPE  __bits[ CPU_SETSIZE / __CPU_BITS ];
 } cpu_set_t;
 
 extern int sched_setaffinity(pid_t pid, size_t setsize, const cpu_set_t* set);
 
 extern int sched_getaffinity(pid_t pid, size_t setsize, cpu_set_t* set);
 
-/* Provide optimized implementation for 32-bit cpu_set_t */
-#if CPU_SETSIZE == __CPU_BITS
+#define CPU_ZERO(set)          CPU_ZERO_S(sizeof(cpu_set_t), set)
+#define CPU_SET(cpu, set)      CPU_SET_S(cpu, sizeof(cpu_set_t), set)
+#define CPU_CLR(cpu, set)      CPU_CLR_S(cpu, sizeof(cpu_set_t), set)
+#define CPU_ISSET(cpu, set)    CPU_ISSET_S(cpu, sizeof(cpu_set_t), set)
+#define CPU_COUNT(set)         CPU_COUNT_S(sizeof(cpu_set_t), set)
+#define CPU_EQUAL(set1, set2)  CPU_EQUAL_S(sizeof(cpu_set_t), set1, set2)
 
-#  define CPU_ZERO(set_)   \
-    do{ \
-        (set_)->__bits[0] = 0; \
-    }while(0)
+#define CPU_AND(dst, set1, set2)  __CPU_OP(dst, set1, set2, &)
+#define CPU_OR(dst, set1, set2)   __CPU_OP(dst, set1, set2, |)
+#define CPU_XOR(dst, set1, set2)  __CPU_OP(dst, set1, set2, ^)
 
-#  define CPU_SET(cpu_,set_) \
-    do {\
-        size_t __cpu = (cpu_); \
-        if (__cpu < CPU_SETSIZE) \
-            (set_)->__bits[0] |= __CPU_MASK(__cpu); \
-    }while (0)
-
-#  define CPU_CLR(cpu_,set_) \
-    do {\
-        size_t __cpu = (cpu_); \
-        if (__cpu < CPU_SETSIZE) \
-            (set_)->__bits[0] &= ~__CPU_MASK(__cpu); \
-    }while (0)
-
-#  define CPU_ISSET(cpu_, set_) \
-    (__extension__({\
-        size_t  __cpu = (cpu_); \
-        (cpu_ < CPU_SETSIZE) \
-            ? ((set_)->__bits[0] & __CPU_MASK(__cpu)) != 0 \
-            : 0; \
-    }))
-
-#  define CPU_EQUAL(set1_, set2_) \
-    ((set1_)->__bits[0] == (set2_)->__bits[0])
-
-#  define __CPU_OP(dst_, set1_, set2_, op_) \
-    do { \
-        (dst_)->__bits[0] = (set1_)->__bits[0] op_ (set2_)->__bits[0]; \
-    } while (0)
-
-#  define CPU_COUNT(set_)  __builtin_popcountl((set_)->__bits[0])
-
-#else /* CPU_SETSIZE != __CPU_BITS */
-
-#  define CPU_ZERO(set_)          CPU_ZERO_S(sizeof(cpu_set_t), set_)
-#  define CPU_SET(cpu_,set_)      CPU_SET_S(cpu_,sizeof(cpu_set_t),set_)
-#  define CPU_CLR(cpu_,set_)      CPU_CLR_S(cpu_,sizeof(cpu_set_t),set_)
-#  define CPU_ISSET(cpu_,set_)    CPU_ISSET_S(cpu_,sizeof(cpu_set_t),set_)
-#  define CPU_COUNT(set_)         CPU_COUNT_S(sizeof(cpu_set_t),set_)
-#  define CPU_EQUAL(set1_,set2_)  CPU_EQUAL_S(sizeof(cpu_set_t),set1_,set2_)
-
-#  define __CPU_OP(dst_,set1_,set2_,op_)  __CPU_OP_S(sizeof(cpu_set_t),dst_,set1_,set2_,op_)
-
-#endif /* CPU_SETSIZE != __CPU_BITS */
-
-#define CPU_AND(set1_,set2_)   __CPU_OP(set1_,set2_,&)
-#define CPU_OR(set1_,set2_)    __CPU_OP(set1_,set2_,|)
-#define CPU_XOR(set1_,set2_)   __CPU_OP(set1_,set2_,^)
+#define __CPU_OP(dst, set1, set2, op)  __CPU_OP_S(sizeof(cpu_set_t), dst, set1, set2, op)
 
 /* Support for dynamically-allocated cpu_set_t */
 
 #define CPU_ALLOC_SIZE(count) \
-    __CPU_ELT((count) + (__CPU_BITS-1))*sizeof(__CPU_BITTYPE)
+  __CPU_ELT((count) + (__CPU_BITS - 1)) * sizeof(__CPU_BITTYPE)
 
-#define CPU_ALLOC(count)   __sched_cpualloc((count));
-#define CPU_FREE(set)      __sched_cpufree((set))
+#define CPU_ALLOC(count)  __sched_cpualloc((count))
+#define CPU_FREE(set)     __sched_cpufree((set))
 
 extern cpu_set_t* __sched_cpualloc(size_t count);
 extern void       __sched_cpufree(cpu_set_t* set);
 
-#define CPU_ZERO_S(setsize_,set_)  \
-    do { \
-        size_t __nn = 0; \
-        size_t __nn_max = (setsize_)/sizeof(__CPU_BITTYPE); \
-        for (; __nn < __nn_max; __nn++) \
-            (set_)->__bits[__nn] = 0; \
-    } while (0)
+#define CPU_ZERO_S(setsize, set)  __builtin_memset(set, 0, setsize)
 
-#define CPU_SET_S(cpu_,setsize_,set_) \
-    do { \
-        size_t __cpu = (cpu_); \
-        if (__cpu < 8*(setsize_)) \
-            (set_)->__bits[__CPU_ELT(__cpu)] |= __CPU_MASK(__cpu); \
-    } while (0)
+#define CPU_SET_S(cpu, setsize, set) \
+  do { \
+    size_t __cpu = (cpu); \
+    if (__cpu < 8 * (setsize)) \
+      (set)->__bits[__CPU_ELT(__cpu)] |= __CPU_MASK(__cpu); \
+  } while (0)
 
-#define CPU_CLR_S(cpu_,setsize_,set_) \
-    do { \
-        size_t __cpu = (cpu_); \
-        if (__cpu < 8*(setsize_)) \
-            (set_)->__bits[__CPU_ELT(__cpu)] &= ~__CPU_MASK(__cpu); \
-    } while (0)
+#define CPU_CLR_S(cpu, setsize, set) \
+  do { \
+    size_t __cpu = (cpu); \
+    if (__cpu < 8 * (setsize)) \
+      (set)->__bits[__CPU_ELT(__cpu)] &= ~__CPU_MASK(__cpu); \
+  } while (0)
 
-#define CPU_ISSET_S(cpu_, setsize_, set_) \
-    (__extension__ ({ \
-        size_t __cpu = (cpu_); \
-        (__cpu < 8*(setsize_)) \
-          ? ((set_)->__bits[__CPU_ELT(__cpu)] & __CPU_MASK(__cpu)) != 0 \
-          : 0; \
-    }))
+#define CPU_ISSET_S(cpu, setsize, set) \
+  (__extension__ ({ \
+    size_t __cpu = (cpu); \
+    (__cpu < 8 * (setsize)) \
+      ? ((set)->__bits[__CPU_ELT(__cpu)] & __CPU_MASK(__cpu)) != 0 \
+      : 0; \
+  }))
 
-#define CPU_EQUAL_S(setsize_, set1_, set2_) \
-    (__extension__ ({ \
-        __const __CPU_BITTYPE* __src1 = (set1_)->__bits; \
-        __const __CPU_BITTYPE* __src2 = (set2_)->__bits; \
-        size_t __nn = 0, __nn_max = (setsize_)/sizeof(__CPU_BITTYPE); \
-        for (; __nn < __nn_max; __nn++) { \
-            if (__src1[__nn] != __src2[__nn]) \
-                break; \
-        } \
-        __nn == __nn_max; \
-    }))
+#define CPU_EQUAL_S(setsize, set1, set2)  (__builtin_memcmp(set1, set2, setsize) == 0)
 
-#define __CPU_OP_S(setsize_, dstset_, srcset1_, srcset2_, op) \
-    do { \
-        cpu_set_t* __dst = (dstset); \
-        const __CPU_BITTYPE* __src1 = (srcset1)->__bits; \
-        const __CPU_BITTYPE* __src2 = (srcset2)->__bits; \
-        size_t __nn = 0, __nn_max = (setsize_)/sizeof(__CPU_BITTYPE); \
-        for (; __nn < __nn_max; __nn++) \
-            (__dst)->__bits[__nn] = __src1[__nn] op __src2[__nn]; \
-    } while (0)
+#define CPU_AND_S(setsize, dst, set1, set2)  __CPU_OP_S(setsize, dst, set1, set2, &)
+#define CPU_OR_S(setsize, dst, set1, set2)   __CPU_OP_S(setsize, dst, set1, set2, |)
+#define CPU_XOR_S(setsize, dst, set1, set2)  __CPU_OP_S(setsize, dst, set1, set2, ^)
 
-#define CPU_COUNT_S(setsize_, set_) \
-    __sched_cpucount((setsize_), (set_))
+#define __CPU_OP_S(setsize, dstset, srcset1, srcset2, op) \
+  do { \
+    cpu_set_t* __dst = (dstset); \
+    const __CPU_BITTYPE* __src1 = (srcset1)->__bits; \
+    const __CPU_BITTYPE* __src2 = (srcset2)->__bits; \
+    size_t __nn = 0, __nn_max = (setsize)/sizeof(__CPU_BITTYPE); \
+    for (; __nn < __nn_max; __nn++) \
+      (__dst)->__bits[__nn] = __src1[__nn] op __src2[__nn]; \
+  } while (0)
+
+#define CPU_COUNT_S(setsize, set)  __sched_cpucount((setsize), (set))
 
 extern int __sched_cpucount(size_t setsize, cpu_set_t* set);
 
diff --git a/tests/sched_test.cpp b/tests/sched_test.cpp
index 49f1642..2578289 100644
--- a/tests/sched_test.cpp
+++ b/tests/sched_test.cpp
@@ -50,3 +50,207 @@
   GTEST_LOG_(INFO) << "This test does nothing.\n";
 }
 #endif
+
+TEST(sched, cpu_set) {
+  cpu_set_t set;
+
+  CPU_ZERO(&set);
+  CPU_SET(0, &set);
+  CPU_SET(17, &set);
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_EQ(i == 0 || i == 17, CPU_ISSET(i, &set));
+  }
+
+  // We should fail silently if we try to set/test outside the range.
+  CPU_SET(CPU_SETSIZE, &set);
+  ASSERT_FALSE(CPU_ISSET(CPU_SETSIZE, &set));
+}
+
+TEST(sched, cpu_count) {
+  cpu_set_t set;
+
+  CPU_ZERO(&set);
+  ASSERT_EQ(0, CPU_COUNT(&set));
+  CPU_SET(2, &set);
+  CPU_SET(10, &set);
+  ASSERT_EQ(2, CPU_COUNT(&set));
+  CPU_CLR(10, &set);
+  ASSERT_EQ(1, CPU_COUNT(&set));
+}
+
+TEST(sched, cpu_zero) {
+  cpu_set_t set;
+
+  CPU_ZERO(&set);
+  ASSERT_EQ(0, CPU_COUNT(&set));
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_FALSE(CPU_ISSET(i, &set));
+  }
+}
+
+TEST(sched, cpu_clr) {
+  cpu_set_t set;
+
+  CPU_ZERO(&set);
+  CPU_SET(0, &set);
+  CPU_SET(1, &set);
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_EQ(i == 0 || i == 1, CPU_ISSET(i, &set));
+  }
+  CPU_CLR(1, &set);
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_EQ(i == 0, CPU_ISSET(i, &set));
+  }
+
+  // We should fail silently if we try to clear/test outside the range.
+  CPU_CLR(CPU_SETSIZE, &set);
+  ASSERT_FALSE(CPU_ISSET(CPU_SETSIZE, &set));
+}
+
+TEST(sched, cpu_equal) {
+  cpu_set_t set1;
+  cpu_set_t set2;
+
+  CPU_ZERO(&set1);
+  CPU_ZERO(&set2);
+  CPU_SET(1, &set1);
+  ASSERT_FALSE(CPU_EQUAL(&set1, &set2));
+  CPU_SET(1, &set2);
+  ASSERT_TRUE(CPU_EQUAL(&set1, &set2));
+}
+
+TEST(sched, cpu_op) {
+  cpu_set_t set1;
+  cpu_set_t set2;
+  cpu_set_t set3;
+
+  CPU_ZERO(&set1);
+  CPU_ZERO(&set2);
+  CPU_ZERO(&set3);
+  CPU_SET(0, &set1);
+  CPU_SET(0, &set2);
+  CPU_SET(1, &set2);
+
+  CPU_AND(&set3, &set1, &set2);
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_EQ(i == 0, CPU_ISSET(i, &set3));
+  }
+
+  CPU_XOR(&set3, &set1, &set2);
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_EQ(i == 1, CPU_ISSET(i, &set3));
+  }
+
+  CPU_OR(&set3, &set1, &set2);
+  for (int i = 0; i < CPU_SETSIZE; i++) {
+    ASSERT_EQ(i == 0 || i == 1, CPU_ISSET(i, &set3));
+  }
+}
+
+
+TEST(sched, cpu_alloc_small) {
+  cpu_set_t* set = CPU_ALLOC(17);
+  size_t size = CPU_ALLOC_SIZE(17);
+
+  CPU_ZERO_S(size, set);
+  ASSERT_EQ(0, CPU_COUNT_S(size, set));
+  CPU_SET_S(16, size, set);
+  ASSERT_TRUE(CPU_ISSET_S(16, size, set));
+
+  CPU_FREE(set);
+}
+
+TEST(sched, cpu_alloc_big) {
+  cpu_set_t* set = CPU_ALLOC(10 * CPU_SETSIZE);
+  size_t size = CPU_ALLOC_SIZE(10 * CPU_SETSIZE);
+
+  CPU_ZERO_S(size, set);
+  ASSERT_EQ(0, CPU_COUNT_S(size, set));
+  CPU_SET_S(CPU_SETSIZE, size, set);
+  ASSERT_TRUE(CPU_ISSET_S(CPU_SETSIZE, size, set));
+
+  CPU_FREE(set);
+}
+
+TEST(sched, cpu_s_macros) {
+  int set_size = 64;
+  size_t size = CPU_ALLOC_SIZE(set_size);
+  cpu_set_t* set = CPU_ALLOC(set_size);
+
+  CPU_ZERO_S(size, set);
+  for (int i = 0; i < set_size; i++) {
+    ASSERT_FALSE(CPU_ISSET_S(i, size, set));
+    CPU_SET_S(i, size, set);
+    ASSERT_TRUE(CPU_ISSET_S(i, size, set));
+    ASSERT_EQ(i + 1, CPU_COUNT_S(size, set));
+  }
+
+  for (int i = 0; i < set_size; i++) {
+    CPU_CLR_S(i, size, set);
+    ASSERT_FALSE(CPU_ISSET_S(i, size, set));
+    ASSERT_EQ(set_size - i - 1, CPU_COUNT_S(size, set));
+  }
+
+  CPU_FREE(set);
+}
+
+TEST(sched, cpu_op_s_macros) {
+  int set_size1 = 64;
+  int set_size2 = set_size1 * 2;
+  int set_size3 = set_size1 * 3;
+  size_t size1 = CPU_ALLOC_SIZE(set_size1);
+  size_t size2 = CPU_ALLOC_SIZE(set_size2);
+  size_t size3 = CPU_ALLOC_SIZE(set_size3);
+
+  cpu_set_t* set1 = CPU_ALLOC(size1);
+  cpu_set_t* set2 = CPU_ALLOC(size2);
+  cpu_set_t* set3 = CPU_ALLOC(size3);
+  CPU_ZERO_S(size1, set1);
+  CPU_ZERO_S(size2, set2);
+  CPU_ZERO_S(size3, set3);
+
+  CPU_SET_S(0, size1, set1);
+  CPU_SET_S(0, size2, set2);
+  CPU_SET_S(1, size3, set2);
+
+  CPU_AND_S(size1, set3, set1, set2);
+  for (int i = 0; i < set_size3; i++) {
+    ASSERT_EQ(i == 0, CPU_ISSET_S(i, size3, set3));
+  }
+
+  CPU_OR_S(size1, set3, set1, set2);
+  for (int i = 0; i < set_size3; i++) {
+    ASSERT_EQ(i == 0 || i == 1, CPU_ISSET_S(i, size3, set3));
+  }
+
+  CPU_XOR_S(size1, set3, set1, set2);
+  for (int i = 0; i < set_size3; i++) {
+    ASSERT_EQ(i == 1, CPU_ISSET_S(i, size3, set3));
+  }
+
+  CPU_FREE(set1);
+  CPU_FREE(set2);
+  CPU_FREE(set3);
+}
+
+TEST(sched, cpu_equal_s) {
+  int set_size1 = 64;
+  int set_size2 = set_size1 * 2;
+  size_t size1 = CPU_ALLOC_SIZE(set_size1);
+  size_t size2 = CPU_ALLOC_SIZE(set_size2);
+
+  cpu_set_t* set1 = CPU_ALLOC(size1);
+  cpu_set_t* set2 = CPU_ALLOC(size2);
+
+  CPU_ZERO_S(size1, set1);
+  CPU_ZERO_S(size2, set2);
+
+  CPU_SET_S(0, size1, set1);
+  ASSERT_TRUE(CPU_EQUAL_S(size1, set1, set1));
+  ASSERT_FALSE(CPU_EQUAL_S(size1, set1, set2));
+  CPU_SET_S(0, size2, set2);
+  ASSERT_TRUE(CPU_EQUAL_S(size1, set1, set2));
+
+  CPU_FREE(set1);
+  CPU_FREE(set2);
+}