Fix SkBitSet.

Rename methods to be more like std::bitset and boost::dynamic_bitset.
Also fix findFirst to actually find the first and add tests.

Change-Id: Ic812c0a6d0ce1740c1cda900516f609ebbf5811c
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/287676
Commit-Queue: Ben Wagner <bungeman@google.com>
Reviewed-by: Jim Van Verth <jvanverth@google.com>
diff --git a/bench/MathBench.cpp b/bench/MathBench.cpp
index e47c6ac..49dc177 100644
--- a/bench/MathBench.cpp
+++ b/bench/MathBench.cpp
@@ -441,6 +441,66 @@
     typedef Benchmark INHERITED;
 };
 
+class CTZBench : public Benchmark {
+    enum {
+        ARRAY = 1000,
+    };
+    uint32_t fData[ARRAY];
+    bool fUsePortable;
+
+public:
+    CTZBench(bool usePortable) : fUsePortable(usePortable) {
+
+        SkRandom rand;
+        for (int i = 0; i < ARRAY; ++i) {
+            fData[i] = rand.nextU();
+        }
+
+        if (fUsePortable) {
+            fName = "ctz_portable";
+        } else {
+            fName = "ctz_intrinsic";
+        }
+    }
+
+    bool isSuitableFor(Backend backend) override {
+        return backend == kNonRendering_Backend;
+    }
+
+    // just so the compiler doesn't remove our loops
+    virtual void process(int) {}
+
+protected:
+    void onDraw(int loops, SkCanvas*) override {
+        int accum = 0;
+
+        if (fUsePortable) {
+            for (int j = 0; j < loops; ++j) {
+                for (int i = 0; i < ARRAY; ++i) {
+                    accum += SkCTZ_portable(fData[i]);
+                }
+                this->process(accum);
+            }
+        } else {
+            for (int j = 0; j < loops; ++j) {
+                for (int i = 0; i < ARRAY; ++i) {
+                    accum += SkCTZ(fData[i]);
+                }
+                this->process(accum);
+            }
+        }
+    }
+
+    const char* onGetName() override {
+        return fName;
+    }
+
+private:
+    const char* fName;
+
+    typedef Benchmark INHERITED;
+};
+
 ///////////////////////////////////////////////////////////////////////////////
 
 class NormalizeBench : public Benchmark {
@@ -594,6 +654,8 @@
 
 DEF_BENCH( return new CLZBench(false); )
 DEF_BENCH( return new CLZBench(true); )
+DEF_BENCH( return new CTZBench(false); )
+DEF_BENCH( return new CTZBench(true); )
 
 DEF_BENCH( return new NormalizeBench(); )
 
diff --git a/src/core/SkMath.cpp b/src/core/SkMath.cpp
index 8b2df0e..462a6e4 100644
--- a/src/core/SkMath.cpp
+++ b/src/core/SkMath.cpp
@@ -25,22 +25,51 @@
     if (x & 0xFFFF0000) {
         sub_shift(zeros, x, 16);
     }
-    if (x & 0xFF00) {
+    if (x & 0x0000FF00) {
         sub_shift(zeros, x, 8);
     }
-    if (x & 0xF0) {
+    if (x & 0x000000F0) {
         sub_shift(zeros, x, 4);
     }
-    if (x & 0xC) {
+    if (x & 0x0000000C) {
         sub_shift(zeros, x, 2);
     }
-    if (x & 0x2) {
+    if (x & 0x00000002) {
         sub_shift(zeros, x, 1);
     }
 
     return zeros;
 }
 
+#define add_shift(zeros, x, n)  \
+    zeros += n;                 \
+    x >>= n
+
+int SkCTZ_portable(uint32_t x) {
+    if (x == 0) {
+        return 32;
+    }
+
+    int zeros = 0;
+    if (!(x & 0x0000FFFF)) {
+        add_shift(zeros, x, 16);
+    }
+    if (!(x & 0x000000FF)) {
+        add_shift(zeros, x, 8);
+    }
+    if (!(x & 0x0000000F)) {
+        add_shift(zeros, x, 4);
+    }
+    if (!(x & 0x00000003)) {
+        add_shift(zeros, x, 2);
+    }
+    if (!(x & 0x00000001)) {
+        add_shift(zeros, x, 1);
+    }
+
+    return zeros;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 /* www.worldserver.com/turk/computergraphics/FixedSqrt.pdf
diff --git a/src/core/SkMathPriv.h b/src/core/SkMathPriv.h
index 23b51bd..78a9b6f 100644
--- a/src/core/SkMathPriv.h
+++ b/src/core/SkMathPriv.h
@@ -144,7 +144,7 @@
                 _BitScanReverse(&index, mask);
                 // Suppress this bogus /analyze warning. The check for non-zero
                 // guarantees that _BitScanReverse will succeed.
-#pragma warning(suppress : 6102) // Using 'index' from failed function call
+                #pragma warning(suppress : 6102) // Using 'index' from failed function call
                 return index ^ 0x1F;
             } else {
                 return 32;
@@ -156,10 +156,43 @@
             return mask ? __builtin_clz(mask) : 32;
         }
     #else
-        #define SkCLZ(x)    SkCLZ_portable(x)
+        static inline int SkCLZ(uint32_t mask) {
+            return SkCLZ_portable(mask);
+        }
     #endif
 #endif
 
+//! Returns the number of trailing zero bits (0...32)
+int SkCTZ_portable(uint32_t);
+
+#ifndef SkCTZ
+    #if defined(SK_BUILD_FOR_WIN)
+    #include <intrin.h>
+
+    static inline int SkCTZ(uint32_t mask) {
+        if (mask) {
+            unsigned long index;
+            _BitScanForward(&index, mask);
+            // Suppress this bogus /analyze warning. The check for non-zero
+            // guarantees that _BitScanReverse will succeed.
+            #pragma warning(suppress : 6102) // Using 'index' from failed function call
+            return index;
+        } else {
+            return 32;
+        }
+    }
+#elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__)
+    static inline int SkCTZ(uint32_t mask) {
+        // __builtin_ctz(0) is undefined, so we have to detect that case.
+        return mask ? __builtin_ctz(mask) : 32;
+    }
+#else
+    static inline int SkCTZ(uint32_t mask) {
+        return SkCTZ_portable(mask);
+    }
+#endif
+#endif
+
 /**
  *  Returns the smallest power-of-2 that is >= the specified value. If value
  *  is already a power of 2, then it is returned unchanged. It is undefined
diff --git a/src/gpu/d3d/GrD3DDescriptorHeap.cpp b/src/gpu/d3d/GrD3DDescriptorHeap.cpp
index 81c1ae6..8aa120f 100644
--- a/src/gpu/d3d/GrD3DDescriptorHeap.cpp
+++ b/src/gpu/d3d/GrD3DDescriptorHeap.cpp
@@ -40,20 +40,20 @@
     // valid only for non-shader-visible heaps
     SkASSERT(!SkToBool(fHeap->GetDesc().Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE));
     D3D12_CPU_DESCRIPTOR_HANDLE handle = fHeap->GetCPUDescriptorHandleForHeapStart();
-    int freeIndex = fFreeBlocks.leadingBitIndex();
-    SkASSERT(freeIndex >= 0);
-    handle.ptr += freeIndex * fHandleIncrementSize;
-    fFreeBlocks.clear(freeIndex);
+    SkBitSet::OptionalIndex freeBlock = fFreeBlocks.findFirst();
+    SkASSERT(freeBlock);
+    handle.ptr += *freeBlock * fHandleIncrementSize;
+    fFreeBlocks.reset(*freeBlock);
 
     return handle;
 }
 
 D3D12_GPU_DESCRIPTOR_HANDLE GrD3DDescriptorHeap::allocateGPUHandle() {
     D3D12_GPU_DESCRIPTOR_HANDLE handle = fHeap->GetGPUDescriptorHandleForHeapStart();
-    int freeIndex = fFreeBlocks.leadingBitIndex();
-    SkASSERT(freeIndex >= 0);
-    handle.ptr += freeIndex * fHandleIncrementSize;
-    fFreeBlocks.clear(freeIndex);
+    SkBitSet::OptionalIndex freeBlock = fFreeBlocks.findFirst();
+    SkASSERT(freeBlock);
+    handle.ptr += *freeBlock * fHandleIncrementSize;
+    fFreeBlocks.reset(*freeBlock);
 
     return handle;
 }
diff --git a/src/pdf/SkPDFGlyphUse.h b/src/pdf/SkPDFGlyphUse.h
index e4afa72..fa7627a 100644
--- a/src/pdf/SkPDFGlyphUse.h
+++ b/src/pdf/SkPDFGlyphUse.h
@@ -10,7 +10,7 @@
 public:
     SkPDFGlyphUse() : fBitSet(0) {}
     SkPDFGlyphUse(SkGlyphID firstNonZero, SkGlyphID lastGlyph)
-        : fBitSet((int)lastGlyph - firstNonZero + 2)
+        : fBitSet(lastGlyph - firstNonZero + 2)
         , fFirstNonZero(firstNonZero)
         , fLastGlyph(lastGlyph) { SkASSERT(firstNonZero >= 1); }
     ~SkPDFGlyphUse() = default;
@@ -20,15 +20,15 @@
     SkGlyphID firstNonZero() const { return fFirstNonZero; }
     SkGlyphID lastGlyph() const { return fLastGlyph; }
     void set(SkGlyphID gid) { fBitSet.set(this->toCode(gid)); }
-    bool has(SkGlyphID gid) const { return fBitSet.has(this->toCode(gid)); }
+    bool has(SkGlyphID gid) const { return fBitSet.test(this->toCode(gid)); }
 
     template<typename FN>
     void getSetValues(FN f) const {
         if (fFirstNonZero == 1) {
-            return fBitSet.getSetValues(std::move(f));
+            return fBitSet.forEachSetIndex(std::move(f));
         }
         uint16_t offset = fFirstNonZero - 1;
-        fBitSet.getSetValues([&f, offset](unsigned v) { f(v == 0 ? v : v + offset); });
+        fBitSet.forEachSetIndex([&f, offset](unsigned v) { f(v == 0 ? v : v + offset); });
     }
 
 private:
diff --git a/src/utils/SkBitSet.h b/src/utils/SkBitSet.h
index b2bb6ef..2224164 100644
--- a/src/utils/SkBitSet.h
+++ b/src/utils/SkBitSet.h
@@ -43,22 +43,23 @@
     }
 
     /** Set the value of the index-th bit to false.  */
-    void clear(size_t index) {
+    void reset(size_t index) {
         SkASSERT(index < fSize);
         *this->chunkFor(index) &= ~chunkMaskFor(index);
     }
 
-    bool has(size_t index) const {
-        return (index < fSize) && SkToBool(*this->chunkFor(index) & chunkMaskFor(index));
+    bool test(size_t index) const {
+        SkASSERT(index < fSize);
+        return SkToBool(*this->chunkFor(index) & chunkMaskFor(index));
     }
 
     size_t size() const {
         return fSize;
     }
 
-    // Calls f(size_t) for each set value.
+    // Calls f(size_t index) for each set index.
     template<typename FN>
-    void getSetValues(FN f) const {
+    void forEachSetIndex(FN f) const {
         const Chunk* chunks = fChunks.get();
         const size_t numChunks = numChunksFor(fSize);
         for (size_t i = 0; i < numChunks; ++i) {
@@ -73,18 +74,49 @@
         }
     }
 
-    // Returns the index of the first set bit
-    // Returns -1 if no bits are set
-    int leadingBitIndex() {
+    // Use std::optional<size_t> when possible.
+    class OptionalIndex {
+        bool fHasValue;
+        size_t fValue;
+    public:
+        OptionalIndex() : fHasValue(false) {}
+        constexpr OptionalIndex(size_t index) : fHasValue(true), fValue(index) {}
+
+        constexpr size_t* operator->() { return &fValue; }
+        constexpr const size_t* operator->() const { return &fValue; }
+        constexpr size_t& operator*() & { return fValue; }
+        constexpr const size_t& operator*() const& { return fValue; }
+        constexpr size_t&& operator*() && { return std::move(fValue); }
+        constexpr const size_t&& operator*() const&& { return std::move(fValue); }
+
+        constexpr explicit operator bool() const noexcept { return fHasValue; }
+        constexpr bool has_value() const noexcept { return fHasValue; }
+
+        constexpr size_t& value() & { return fValue; }
+        constexpr const size_t& value() const & { return fValue; }
+        constexpr size_t&& value() && { return std::move(fValue); }
+        constexpr const size_t&& value() const && { return std::move(fValue); }
+
+        template<typename U> constexpr size_t value_or(U&& defaultValue) const& {
+            return bool(*this) ? **this
+                               : static_cast<size_t>(std::forward<U>(defaultValue));
+        }
+        template<typename U> constexpr size_t value_or(U&& defaultValue) && {
+            return bool(*this) ? std::move(**this)
+                               : static_cast<size_t>(std::forward<U>(defaultValue));
+        }
+    };
+    // If any bits are set returns the index of the first.
+    OptionalIndex findFirst() {
         const Chunk* chunks = fChunks.get();
         const size_t numChunks = numChunksFor(fSize);
         for (size_t i = 0; i < numChunks; ++i) {
             if (Chunk chunk = chunks[i]) {  // There are set bits
-                static_assert(ChunkBits <= std::numeric_limits<uint32_t>::digits, "SkPrevLog2");
-                return i * ChunkBits + SkPrevLog2(chunk);
+                static_assert(ChunkBits <= std::numeric_limits<uint32_t>::digits, "SkCTZ");
+                return OptionalIndex(i * ChunkBits + SkCTZ(chunk));
             }
         }
-        return -1;
+        return OptionalIndex();
     }
 
 private:
diff --git a/src/xps/SkXPSDevice.cpp b/src/xps/SkXPSDevice.cpp
index 53cd5e6..a131d8d 100644
--- a/src/xps/SkXPSDevice.cpp
+++ b/src/xps/SkXPSDevice.cpp
@@ -321,7 +321,7 @@
     //CreateFontPackage wants unsigned short.
     //Microsoft, Y U NO stdint.h?
     std::vector<unsigned short> keepList;
-    current.glyphsUsed.getSetValues([&keepList](unsigned v) {
+    current.glyphsUsed.forEachSetIndex([&keepList](size_t v) {
             keepList.push_back((unsigned short)v);
     });
 
diff --git a/tests/BitSetTest.cpp b/tests/BitSetTest.cpp
index c9105b0..d19b0cf 100644
--- a/tests/BitSetTest.cpp
+++ b/tests/BitSetTest.cpp
@@ -12,21 +12,33 @@
 
 DEF_TEST(BitSet, reporter) {
     SkBitSet set0(65536);
-    REPORTER_ASSERT(reporter, set0.has(0) == false);
-    REPORTER_ASSERT(reporter, set0.has(32767) == false);
-    REPORTER_ASSERT(reporter, set0.has(65535) == false);
+    REPORTER_ASSERT(reporter, set0.size() == 65536);
+    REPORTER_ASSERT(reporter, set0.test(0) == false);
+    REPORTER_ASSERT(reporter, set0.test(32767) == false);
+    REPORTER_ASSERT(reporter, set0.test(65535) == false);
+    REPORTER_ASSERT(reporter, !set0.findFirst());
 
     set0.set(22);
-    REPORTER_ASSERT(reporter, set0.has(22) == true);
+    REPORTER_ASSERT(reporter, set0.test(22) == true);
+    REPORTER_ASSERT(reporter, set0.findFirst());
+    REPORTER_ASSERT(reporter, *set0.findFirst() == 22);
     set0.set(24);
-    REPORTER_ASSERT(reporter, set0.has(24) == true);
+    REPORTER_ASSERT(reporter, set0.test(24) == true);
+    REPORTER_ASSERT(reporter, *set0.findFirst() == 22);
     set0.set(35);  // on a different DWORD
-    REPORTER_ASSERT(reporter, set0.has(35) == true);
-    REPORTER_ASSERT(reporter, set0.has(24) == true);
-    REPORTER_ASSERT(reporter, set0.has(35) == true);
+    REPORTER_ASSERT(reporter, set0.test(35) == true);
+    REPORTER_ASSERT(reporter, *set0.findFirst() == 22);
+    REPORTER_ASSERT(reporter, set0.test(24) == true);
+    REPORTER_ASSERT(reporter, set0.test(35) == true);
+    set0.set(21);
+    REPORTER_ASSERT(reporter, set0.test(21) == true);
+    REPORTER_ASSERT(reporter, *set0.findFirst() == 21);
+    set0.reset(21);
+    REPORTER_ASSERT(reporter, set0.test(21) == false);
+    REPORTER_ASSERT(reporter, *set0.findFirst() == 22);
 
     std::vector<unsigned int> data;
-    set0.getSetValues([&data](unsigned v) { data.push_back(v); });
+    set0.forEachSetIndex([&data](unsigned v) { data.push_back(v); });
 
     REPORTER_ASSERT(reporter, data.size() == 3);
     REPORTER_ASSERT(reporter, data[0] == 22);
@@ -35,8 +47,8 @@
 
     SkBitSet set1(65536);
     set1.set(12345);
-    REPORTER_ASSERT(reporter, set0.has(12345) == false);
-    REPORTER_ASSERT(reporter, set1.has(12345) == true);
-    REPORTER_ASSERT(reporter, set1.has(22) == false);
-    REPORTER_ASSERT(reporter, set0.has(35) == true);
+    REPORTER_ASSERT(reporter, set0.test(12345) == false);
+    REPORTER_ASSERT(reporter, set1.test(12345) == true);
+    REPORTER_ASSERT(reporter, set1.test(22) == false);
+    REPORTER_ASSERT(reporter, set0.test(35) == true);
 }
diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp
index 7d7427f..d3164ed 100644
--- a/tests/MathTest.cpp
+++ b/tests/MathTest.cpp
@@ -20,6 +20,7 @@
     REPORTER_ASSERT(reporter, 32 == SkCLZ(0));
     REPORTER_ASSERT(reporter, 31 == SkCLZ(1));
     REPORTER_ASSERT(reporter, 1 == SkCLZ(1 << 30));
+    REPORTER_ASSERT(reporter, 1 == SkCLZ((1 << 30) | (1 << 24) | 1));
     REPORTER_ASSERT(reporter, 0 == SkCLZ(~0U));
 
     SkRandom rand;
@@ -30,7 +31,26 @@
         mask >>= (mask & 31);
         int intri = SkCLZ(mask);
         int porta = SkCLZ_portable(mask);
-        REPORTER_ASSERT(reporter, intri == porta);
+        REPORTER_ASSERT(reporter, intri == porta, "mask:%d intri:%d porta:%d", mask, intri, porta);
+    }
+}
+
+static void test_ctz(skiatest::Reporter* reporter) {
+    REPORTER_ASSERT(reporter, 32 == SkCTZ(0));
+    REPORTER_ASSERT(reporter, 0 == SkCTZ(1));
+    REPORTER_ASSERT(reporter, 30 == SkCTZ(1 << 30));
+    REPORTER_ASSERT(reporter, 2 == SkCTZ((1 << 30) | (1 << 24) | (1 << 2)));
+    REPORTER_ASSERT(reporter, 0 == SkCTZ(~0U));
+
+    SkRandom rand;
+    for (int i = 0; i < 1000; ++i) {
+        uint32_t mask = rand.nextU();
+        // need to get some zeros for testing, but in some obscure way so the
+        // compiler won't "see" that, and work-around calling the functions.
+        mask >>= (mask & 31);
+        int intri = SkCTZ(mask);
+        int porta = SkCTZ_portable(mask);
+        REPORTER_ASSERT(reporter, intri == porta, "mask:%d intri:%d porta:%d", mask, intri, porta);
     }
 }
 
@@ -472,6 +492,7 @@
 
     test_muldivround(reporter);
     test_clz(reporter);
+    test_ctz(reporter);
 }
 
 template <typename T> struct PairRec {