revert 4478



git-svn-id: http://skia.googlecode.com/svn/trunk@4479 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/bench/ChecksumBench.cpp b/bench/ChecksumBench.cpp
index fe3fd47..903e584 100644
--- a/bench/ChecksumBench.cpp
+++ b/bench/ChecksumBench.cpp
@@ -7,41 +7,97 @@
 #include "SkBenchmark.h"
 #include "SkCanvas.h"
 #include "SkChecksum.h"
-#include "SkRandom.h"
+#include "SkString.h"
 
 class ComputeChecksumBench : public SkBenchmark {
-    enum {
-        U32COUNT  = 256,
-        SIZE      = U32COUNT * 4,
-        N         = SkBENCHLOOP(100000),
-    };
-    uint32_t    fData[U32COUNT];
-
 public:
-    ComputeChecksumBench(void* param) : INHERITED(param) {
-        SkRandom rand;
-        for (int i = 0; i < U32COUNT; ++i) {
-            fData[i] = rand.nextU();
-        }
+    ComputeChecksumBench(void* param, const char name[]) : INHERITED(param) {
+        fName.printf("compute_checksum_%s", name);
     }
 
+    enum {
+        DATA_SIZE = 1024,
+        N         = SkBENCHLOOP(100000),
+    };
 protected:
     virtual const char* onGetName() {
-        return "compute_checksum";
+        return fName.c_str();
     }
 
     virtual void onDraw(SkCanvas* canvas) {
-        for (int i = 0; i < N; i++) {
-            volatile uint32_t result = SkChecksum::Compute(fData, sizeof(fData));
-        }
+        uint64_t data[DATA_SIZE / sizeof(uint64_t)];
+        computeChecksum(data, DATA_SIZE);
     }
 
+    virtual void computeChecksum(const uint64_t*, size_t) = 0;
+
+    SkString fName;
 private:
     typedef SkBenchmark INHERITED;
 };
 
+/*
+ *  Use SkComputeChecksum32 to compute a checksum on a datablock
+ */
+class ComputeChecksum32Bench : public ComputeChecksumBench {
+public:
+    ComputeChecksum32Bench(void* param)
+        : INHERITED(param, "32") { }
+
+protected:
+    virtual void computeChecksum(const uint64_t* data, size_t len) {
+        for (int i = 0; i < N; i++) {
+            volatile uint32_t result = SkComputeChecksum32(reinterpret_cast<const uint32_t*>(data), len);
+        }
+    }
+
+private:
+    typedef ComputeChecksumBench INHERITED;
+};
+
+/*
+ *  Use SkComputeChecksum64 to compute a checksum on a datablock
+ */
+class ComputeChecksum64Bench : public ComputeChecksumBench {
+public:
+    ComputeChecksum64Bench(void* param)
+    : INHERITED(param, "64") { }
+    
+protected:
+    virtual void computeChecksum(const uint64_t* data, size_t len) {
+        for (int i = 0; i < N; i++) {
+            volatile uint64_t result = SkComputeChecksum64(data, len);
+        }
+    }
+    
+private:
+    typedef ComputeChecksumBench INHERITED;
+};
+
+/*
+ *  Use SkComputeChecksum64 to compute a checksum on a datablock
+ */
+class ComputeChecksumXXBench : public ComputeChecksumBench {
+public:
+    ComputeChecksumXXBench(void* param) : INHERITED(param, "XX") { }
+    
+protected:
+    virtual void computeChecksum(const uint64_t* data, size_t len) {
+        for (int i = 0; i < N; i++) {
+            volatile uint32_t result = SkChecksum::Compute(reinterpret_cast<const uint32_t*>(data), len);
+        }
+    }
+    
+private:
+    typedef ComputeChecksumBench INHERITED;
+};
+
 ///////////////////////////////////////////////////////////////////////////////
 
-static SkBenchmark* Fact0(void* p) { return new ComputeChecksumBench(p); }
+static SkBenchmark* Fact0(void* p) { return new ComputeChecksum32Bench(p); }
+static SkBenchmark* Fact1(void* p) { return new ComputeChecksum64Bench(p); }
+static SkBenchmark* Fact2(void* p) { return new ComputeChecksumXXBench(p); }
 
 static BenchRegistry gReg0(Fact0);
+static BenchRegistry gReg1(Fact1);
+static BenchRegistry gReg2(Fact2);
diff --git a/include/core/SkChecksum.h b/include/core/SkChecksum.h
index e66df54..e767670 100644
--- a/include/core/SkChecksum.h
+++ b/include/core/SkChecksum.h
@@ -10,6 +10,64 @@
 
 #include "SkTypes.h"
 
+#if !defined(SK_PREFER_32BIT_CHECKSUM)
+#define SK_PREFER_32BIT_CHECKSUM 0
+#endif
+
+enum {
+    ChecksumRotateBits = 17
+};
+
+#define SkCHECKSUM_MASH(CHECKSUM, NEW_CHUNK) \
+    CHECKSUM = (((CHECKSUM) >> (sizeof(CHECKSUM)*8 - ChecksumRotateBits)) + \
+        ((CHECKSUM) << ChecksumRotateBits)) ^ (NEW_CHUNK);
+
+
+/**
+ *  Compute a 64-bit checksum for a given data block
+ *
+ *  @param data Memory address of the data block to be processed. Must be
+ *      32-bit aligned
+ *  @param size Size of the data block in bytes. Must be a multiple of 8.
+ *  @return checksum result
+ */
+inline uint64_t SkComputeChecksum64(const uint64_t* ptr, size_t size) {
+    SkASSERT(SkIsAlign8(size));
+    // Strict 8-byte alignment is not required on ptr. On current
+    // CPUs there is no measurable performance difference between 32-bit
+    // and 64-bit aligned access to uint64_t data
+    SkASSERT(SkIsAlign4((intptr_t)ptr));
+
+    const uint64_t* stop = ptr + (size >> 3);
+    uint64_t result = 0;
+    while (ptr < stop) {
+        SkCHECKSUM_MASH(result, *ptr);
+        ptr++;
+    }
+    return result;
+}
+
+/**
+ *  Compute a 32-bit checksum for a given data block
+ *
+ *  @param data Memory address of the data block to be processed. Must be
+ *      32-bit aligned.
+ *  @param size Size of the data block in bytes. Must be a multiple of 4.
+ *  @return checksum result
+ */
+inline uint32_t SkComputeChecksum32(const uint32_t* ptr, size_t size) {
+    SkASSERT(SkIsAlign4(size));
+    SkASSERT(SkIsAlign4((intptr_t)ptr));
+
+    const uint32_t* stop = ptr + (size >> 2);
+    uint32_t result = 0;
+    while (ptr < stop) {
+        SkCHECKSUM_MASH(result, *ptr);
+        ptr++;
+    }
+    return result;
+}
+
 class SkChecksum : SkNoncopyable {
 private:
     /*
diff --git a/include/core/SkDescriptor.h b/include/core/SkDescriptor.h
index 8675fa1..00bc9aa 100644
--- a/include/core/SkDescriptor.h
+++ b/include/core/SkDescriptor.h
@@ -15,29 +15,34 @@
 
 class SkDescriptor : SkNoncopyable {
 public:
-    static size_t ComputeOverhead(int entryCount) {
+    static size_t ComputeOverhead(int entryCount)
+    {
         SkASSERT(entryCount >= 0);
         return sizeof(SkDescriptor) + entryCount * sizeof(Entry);
     }
 
-    static SkDescriptor* Alloc(size_t length) {
+    static SkDescriptor* Alloc(size_t length)
+    {
         SkASSERT(SkAlign4(length) == length);
         SkDescriptor* desc = (SkDescriptor*)sk_malloc_throw(length);
         return desc;
     }
 
-    static void Free(SkDescriptor* desc) {
+    static void Free(SkDescriptor* desc)
+    {
         sk_free(desc);
     }
 
-    void init() {
+    void init()
+    {
         fLength = sizeof(SkDescriptor);
         fCount  = 0;
     }
 
     uint32_t getLength() const { return fLength; }
 
-    void* addEntry(uint32_t tag, uint32_t length, const void* data = NULL) {
+    void* addEntry(uint32_t tag, uint32_t length, const void* data = NULL)
+    {
         SkASSERT(tag);
         SkASSERT(SkAlign4(length) == length);
         SkASSERT(this->findEntry(tag, NULL) == NULL);
@@ -45,34 +50,37 @@
         Entry*  entry = (Entry*)((char*)this + fLength);
         entry->fTag = tag;
         entry->fLen = length;
-        if (data) {
+        if (data)
             memcpy(entry + 1, data, length);
-        }
 
         fCount += 1;
         fLength += sizeof(Entry) + length;
         return (entry + 1); // return its data
     }
 
-    void computeChecksum() {
+    void computeChecksum()
+    {
         fChecksum = SkDescriptor::ComputeChecksum(this);
     }
 
 #ifdef SK_DEBUG
-    void assertChecksum() const {
-        SkASSERT(SkDescriptor::ComputeChecksum(this) == fChecksum);
+    void assertChecksum() const
+    {
+        SkASSERT(fChecksum == SkDescriptor::ComputeChecksum(this));
     }
 #endif
 
-    const void* findEntry(uint32_t tag, uint32_t* length) const {
+    const void* findEntry(uint32_t tag, uint32_t* length) const
+    {
         const Entry* entry = (const Entry*)(this + 1);
         int          count = fCount;
 
-        while (--count >= 0) {
-            if (entry->fTag == tag) {
-                if (length) {
+        while (--count >= 0)
+        {
+            if (entry->fTag == tag)
+            {
+                if (length)
                     *length = entry->fLen;
-                }
                 return entry + 1;
             }
             entry = (const Entry*)((const char*)(entry + 1) + entry->fLen);
@@ -80,13 +88,15 @@
         return NULL;
     }
 
-    SkDescriptor* copy() const {
+    SkDescriptor* copy() const
+    {
         SkDescriptor* desc = SkDescriptor::Alloc(fLength);
         memcpy(desc, this, fLength);
         return desc;
     }
 
-    bool equals(const SkDescriptor& other) const {
+    bool equals(const SkDescriptor& other) const
+    {
         // probe to see if we have a good checksum algo
 //        SkASSERT(a.fChecksum != b.fChecksum || memcmp(&a, &b, a.fLength) == 0);
 
@@ -120,10 +130,11 @@
     uint32_t fLength;    // must be second
     uint32_t fCount;
 
-    static uint32_t ComputeChecksum(const SkDescriptor* desc) {
+    static uint32_t ComputeChecksum(const SkDescriptor* desc)
+    {
         const uint32_t* ptr = (const uint32_t*)desc + 1; // skip the checksum field
-        size_t len = desc->fLength - sizeof(uint32_t);
-        return SkChecksum::Compute(ptr, len);
+        const size_t len = desc->fLength-sizeof(uint32_t);
+        return SkComputeChecksum32(ptr, len);
     }
     
     // private so no one can create one except our factories
@@ -134,20 +145,18 @@
 
 class SkAutoDescriptor : SkNoncopyable {
 public:
-    SkAutoDescriptor(size_t size) {
-        if (size <= sizeof(fStorage)) {
+    SkAutoDescriptor(size_t size)
+    {
+        if (size <= sizeof(fStorage))
             fDesc = (SkDescriptor*)(void*)fStorage;
-        } else {
+        else
             fDesc = SkDescriptor::Alloc(size);
-        }
     }
-
-    ~SkAutoDescriptor() {
-        if (fDesc != (SkDescriptor*)(void*)fStorage) {
+    ~SkAutoDescriptor()
+    {
+        if (fDesc != (SkDescriptor*)(void*)fStorage)
             SkDescriptor::Free(fDesc);
-        }
     }
-
     SkDescriptor* getDesc() const { return fDesc; }
 private:
     enum {
diff --git a/src/core/SkPictureFlat.cpp b/src/core/SkPictureFlat.cpp
index 2f8d1e1..ec04495 100644
--- a/src/core/SkPictureFlat.cpp
+++ b/src/core/SkPictureFlat.cpp
@@ -80,6 +80,12 @@
     flattenProc(buffer, obj);
     uint32_t size = buffer.size();
 
+
+#if !SK_PREFER_32BIT_CHECKSUM
+    uint32_t unpaddedSize = size;
+    size = SkAlign8(size);
+#endif
+
     // allocate enough memory to hold both SkFlatData and the serialized
     // contents
     SkFlatData* result = (SkFlatData*) heap->allocThrow(size + sizeof(SkFlatData));
@@ -88,7 +94,18 @@
 
     // put the serialized contents into the data section of the new allocation
     buffer.flatten(result->data());
-    result->fChecksum = SkChecksum::Compute(result->data32(), size);
+#if SK_PREFER_32BIT_CHECKSUM
+    result->fChecksum =
+        SkComputeChecksum32(reinterpret_cast<uint32_t*>(result->data()), size);
+#else
+    if (size != unpaddedSize) {
+        // Flat data is padded: put zeros in the last 32 bits.
+        SkASSERT(size - 4 == unpaddedSize);
+        *((uint32_t*)((char*)result->data() + unpaddedSize)) = 0;
+    }
+    result->fChecksum =
+        SkComputeChecksum64(reinterpret_cast<uint64_t*>(result->data()), size);
+#endif
     return result;
 }
 
diff --git a/src/core/SkPictureFlat.h b/src/core/SkPictureFlat.h
index 7888b2e..5918261 100644
--- a/src/core/SkPictureFlat.h
+++ b/src/core/SkPictureFlat.h
@@ -156,11 +156,16 @@
 
     static int Compare(const SkFlatData* a, const SkFlatData* b) {
         size_t bytesToCompare = sizeof(a->fChecksum) + a->fAllocSize;
+#if SK_PREFER_32BIT_CHECKSUM
+        typedef uint32_t CompareType;
         SkASSERT(SkIsAlign4(bytesToCompare));
-
-        const uint32_t* a_ptr = &(a->fChecksum);
-        const uint32_t* b_ptr = &(b->fChecksum);
-        const uint32_t* stop = a_ptr + bytesToCompare / sizeof(uint32_t);
+#else
+        typedef uint64_t CompareType;
+        SkASSERT(SkIsAlign8(bytesToCompare));
+#endif
+        const CompareType* a_ptr = &(a->fChecksum);
+        const CompareType* b_ptr = &(b->fChecksum);
+        const CompareType* stop = a_ptr + bytesToCompare / sizeof(CompareType);
         while(a_ptr < stop) {
             if (*a_ptr != *b_ptr) {
                 return (*a_ptr < *b_ptr) ? -1 : 1;
@@ -173,8 +178,6 @@
     
     int index() const { return fIndex; }
     void* data() const { return (char*)this + sizeof(*this); }
-    // We guarantee that our data is 32bit aligned
-    uint32_t* data32() const { return (uint32_t*)this->data(); }
     
 #ifdef SK_DEBUG_SIZE
     size_t size() const { return sizeof(SkFlatData) + fAllocSize; }
@@ -196,7 +199,11 @@
     int fIndex;
     int32_t fAllocSize;
     // fChecksum must be defined last in order to be contiguous with data()
+#if SK_PREFER_32BIT_CHECKSUM
     uint32_t fChecksum;
+#else
+    uint64_t fChecksum;
+#endif
 };
 
 template <class T>