fix gradients with alpha to convert to premul *after* the intermediate color
has been computed, othewise we can't distinguish 0x00000000 from 0x00FF0000
Add fast case for index blit where we read 4 src pixels at a time



git-svn-id: http://skia.googlecode.com/svn/trunk@248 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/samplecode/SampleGradients.cpp b/samplecode/SampleGradients.cpp
index c80d653..f8fa042 100644
--- a/samplecode/SampleGradients.cpp
+++ b/samplecode/SampleGradients.cpp
@@ -3,6 +3,37 @@
 #include "SkCanvas.h"
 #include "SkGradientShader.h"
 
+static SkShader* setgrad(const SkRect& r, SkColor c0, SkColor c1) {
+    SkColor colors[] = { c0, c1 };
+    SkPoint pts[] = { { r.fLeft, r.fTop }, { r.fRight, r.fTop } };
+    return SkGradientShader::CreateLinear(pts, colors, NULL, 2,
+                                          SkShader::kClamp_TileMode, NULL);
+}
+
+static void test_alphagradients(SkCanvas* canvas) {
+    SkRect r;
+    r.set(SkIntToScalar(10), SkIntToScalar(10),
+          SkIntToScalar(410), SkIntToScalar(30));
+    SkPaint p, p2;
+    p2.setStyle(SkPaint::kStroke_Style);
+    
+    p.setShader(setgrad(r, 0xFF00FF00, 0x0000FF00))->unref();
+    canvas->drawRect(r, p);
+    canvas->drawRect(r, p2);
+    
+    r.offset(0, r.height() + SkIntToScalar(4));
+    p.setShader(setgrad(r, 0xFF00FF00, 0x00000000))->unref();
+    canvas->drawRect(r, p);
+    canvas->drawRect(r, p2);
+    
+    r.offset(0, r.height() + SkIntToScalar(4));
+    p.setShader(setgrad(r, 0xFF00FF00, 0x00FF0000))->unref();
+    canvas->drawRect(r, p);
+    canvas->drawRect(r, p2);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
 struct GradData {
     int             fCount;
     const SkColor*  fColors;
@@ -87,7 +118,8 @@
         SkRect r = { 0, 0, SkIntToScalar(100), SkIntToScalar(100) };
         SkPaint paint;
         paint.setAntiAlias(true);
-        
+
+        canvas->save();
         canvas->translate(SkIntToScalar(20), SkIntToScalar(20));
         for (size_t i = 0; i < SK_ARRAY_COUNT(gGradData); i++) {
             canvas->save();
@@ -101,6 +133,10 @@
             canvas->restore();
             canvas->translate(SkIntToScalar(120), 0);
         }
+        canvas->restore();
+        
+        canvas->translate(0, SkIntToScalar(370));
+        test_alphagradients(canvas);
     }
     
 private:
diff --git a/src/core/SkSpriteBlitter_RGB16.cpp b/src/core/SkSpriteBlitter_RGB16.cpp
index a158637..b996ef7 100644
--- a/src/core/SkSpriteBlitter_RGB16.cpp
+++ b/src/core/SkSpriteBlitter_RGB16.cpp
@@ -172,6 +172,68 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+static intptr_t asint(const void* ptr) {
+    return reinterpret_cast<const char*>(ptr) - (const char*)0;
+}
+
+static void blitrow_d16_si8(SK_RESTRICT uint16_t* dst,
+                            SK_RESTRICT const uint8_t* src, int count,
+                            SK_RESTRICT const uint16_t* ctable) {
+    if (count <= 8) {
+        do {
+            *dst++ = ctable[*src++];
+        } while (--count);
+        return;
+    }
+
+    // eat src until we're on a 4byte boundary
+    while (asint(src) & 3) {
+        *dst++ = ctable[*src++];
+        count -= 1;
+    }
+
+    int qcount = count >> 2;
+    SkASSERT(qcount > 0);
+    const uint32_t* qsrc = reinterpret_cast<const uint32_t*>(src);
+    if (asint(dst) & 2) {
+        do {
+            uint32_t s4 = *qsrc++;
+#ifdef SK_CPU_LENDIAN
+            *dst++ = ctable[s4 & 0xFF];
+            *dst++ = ctable[(s4 >> 8) & 0xFF];
+            *dst++ = ctable[(s4 >> 16) & 0xFF];
+            *dst++ = ctable[s4 >> 24];
+#else   // BENDIAN
+            *dst++ = ctable[s4 >> 24];
+            *dst++ = ctable[(s4 >> 16) & 0xFF];
+            *dst++ = ctable[(s4 >> 8) & 0xFF];
+            *dst++ = ctable[s4 & 0xFF];
+#endif
+        } while (--qcount);
+    } else {    // dst is on a 4byte boundary
+        uint32_t* ddst = reinterpret_cast<uint32_t*>(dst);
+        do {
+            uint32_t s4 = *qsrc++;
+#ifdef SK_CPU_LENDIAN
+            *ddst++ = (ctable[(s4 >> 8) & 0xFF] << 16) | ctable[s4 & 0xFF];
+            *ddst++ = (ctable[s4 >> 24] << 16) | ctable[(s4 >> 16) & 0xFF];
+#else   // BENDIAN
+            *ddst++ = (ctable[s4 >> 24] << 16) | ctable[(s4 >> 16) & 0xFF];
+            *ddst++ = (ctable[(s4 >> 8) & 0xFF] << 16) | ctable[s4 & 0xFF];
+#endif
+        } while (--qcount);
+        dst = reinterpret_cast<uint16_t*>(ddst);
+    }
+    src = reinterpret_cast<const uint8_t*>(qsrc);
+    count &= 3;
+    // catch any remaining (will be < 4)
+    while (--count >= 0) {
+        *dst++ = ctable[*src++];
+    }
+}
+
+#define SkSPRITE_ROW_PROC(d, s, n, x, y)    blitrow_d16_si8(d, s, n, ctable)
+            
 #define SkSPRITE_CLASSNAME                  Sprite_D16_SIndex8_Opaque
 #define SkSPRITE_ARGS
 #define SkSPRITE_FIELDS
diff --git a/src/effects/SkGradientShader.cpp b/src/effects/SkGradientShader.cpp
index 0891a37..8065894 100644
--- a/src/effects/SkGradientShader.cpp
+++ b/src/effects/SkGradientShader.cpp
@@ -106,7 +106,6 @@
     SkMatrix    fPtsToUnit;     // set by subclass
     SkMatrix    fDstToIndex;
     SkMatrix::MapXYProc fDstToIndexProc;
-    SkPMColor*  fARGB32;
     TileMode    fTileMode;
     TileProc    fTileProc;
     int         fColorCount;
@@ -136,7 +135,7 @@
     enum {
         kColorStorageCount = 4, // more than this many colors, and we'll use sk_malloc for the space
 
-        kStorageSize = kColorStorageCount * (sizeof(SkColor) + sizeof(SkPMColor) + sizeof(Rec))
+        kStorageSize = kColorStorageCount * (sizeof(SkColor) + sizeof(Rec))
     };
     SkColor     fStorage[(kStorageSize + 3) >> 2];
     SkColor*    fOrigColors;
@@ -200,7 +199,7 @@
     }
 
     if (fColorCount > kColorStorageCount) {
-        size_t size = sizeof(SkColor) + sizeof(SkPMColor) + sizeof(Rec);
+        size_t size = sizeof(SkColor) + sizeof(Rec);
         fOrigColors = reinterpret_cast<SkColor*>(
                                         sk_malloc_throw(size * fColorCount));
     }
@@ -221,10 +220,7 @@
         }
     }
 
-    // our premul colors point to the 2nd half of the array
-    // these are assigned each time in setContext
-    fARGB32 = fOrigColors + fColorCount;
-    fRecs = (Rec*)(fARGB32 + fColorCount);
+    fRecs = (Rec*)(fOrigColors + fColorCount);
     if (fColorCount > 2) {
         Rec* recs = fRecs;
         recs->fPos = 0;
@@ -297,11 +293,10 @@
         fOrigColors = fStorage;
     }
     buffer.read(fOrigColors, colorCount * sizeof(SkColor));
-    fARGB32 = fOrigColors + colorCount;
 
     fTileMode = (TileMode)buffer.readU8();
     fTileProc = gTileProcs[fTileMode];
-    fRecs = (Rec*)(fARGB32 + colorCount);
+    fRecs = (Rec*)(fOrigColors + colorCount);
     if (colorCount > 2) {
         Rec* recs = fRecs;
         recs[0].fPos = 0;
@@ -363,15 +358,11 @@
     unsigned paintAlpha = this->getPaintAlpha();
     unsigned colorAlpha = 0xFF;
 
+    // should record colorAlpha in constructor
     for (int i = 0; i < fColorCount; i++) {
         SkColor src = fOrigColors[i];
         unsigned sa = SkColorGetA(src);
         colorAlpha &= sa;
-        
-        // now modulate it by the paint for our resulting ARGB32 array
-        sa = SkMulDiv255Round(sa, paintAlpha);
-        fARGB32[i] = SkPreMultiplyARGB(sa, SkColorGetR(src), SkColorGetG(src),
-                                       SkColorGetB(src));
     }
 
     fFlags = this->INHERITED::getFlags();
@@ -466,19 +457,24 @@
     } while (--count != 0);
 }
 
-static void build_32bit_cache(SkPMColor cache[], SkPMColor c0, SkPMColor c1,
-                              int count) {
+static void build_32bit_cache(SkPMColor cache[], SkColor c0, SkColor c1,
+                              int count, U8CPU paintAlpha) {
     SkASSERT(count > 1);
 
-    SkFixed a = SkGetPackedA32(c0);
-    SkFixed r = SkGetPackedR32(c0);
-    SkFixed g = SkGetPackedG32(c0);
-    SkFixed b = SkGetPackedB32(c0);
+    // need to apply paintAlpha to our two endpoints
+    SkFixed a = SkMulDiv255Round(SkColorGetA(c0), paintAlpha);
+    SkFixed da;
+    {
+        int tmp = SkMulDiv255Round(SkColorGetA(c1), paintAlpha);
+        da = SkIntToFixed(tmp - a) / (count - 1);
+    }
 
-    SkFixed da = SkIntToFixed(SkGetPackedA32(c1) - a) / (count - 1);
-    SkFixed dr = SkIntToFixed(SkGetPackedR32(c1) - r) / (count - 1);
-    SkFixed dg = SkIntToFixed(SkGetPackedG32(c1) - g) / (count - 1);
-    SkFixed db = SkIntToFixed(SkGetPackedB32(c1) - b) / (count - 1);
+    SkFixed r = SkColorGetR(c0);
+    SkFixed g = SkColorGetG(c0);
+    SkFixed b = SkColorGetB(c0);
+    SkFixed dr = SkIntToFixed(SkColorGetR(c1) - r) / (count - 1);
+    SkFixed dg = SkIntToFixed(SkColorGetG(c1) - g) / (count - 1);
+    SkFixed db = SkIntToFixed(SkColorGetB(c1) - b) / (count - 1);
 
     a = SkIntToFixed(a) + 0x8000;
     r = SkIntToFixed(r) + 0x8000;
@@ -486,7 +482,7 @@
     b = SkIntToFixed(b) + 0x8000;
 
     do {
-        *cache++ = SkPackARGB32(a >> 16, r >> 16, g >> 16, b >> 16);
+        *cache++ = SkPreMultiplyARGB(a >> 16, r >> 16, g >> 16, b >> 16);
         a += da;
         r += dr;
         g += dg;
@@ -559,7 +555,8 @@
 
         fCache32 = fCache32Storage;
         if (fColorCount == 2) {
-            build_32bit_cache(fCache32, fARGB32[0], fARGB32[1], kCache32Count);
+            build_32bit_cache(fCache32, fOrigColors[0], fOrigColors[1],
+                              kCache32Count, fCacheAlpha);
         } else {
             Rec* rec = fRecs;
             int prevIndex = 0;
@@ -568,7 +565,9 @@
                 SkASSERT(nextIndex < kCache32Count);
 
                 if (nextIndex > prevIndex)
-                    build_32bit_cache(fCache32 + prevIndex, fARGB32[i-1], fARGB32[i], nextIndex - prevIndex + 1);
+                    build_32bit_cache(fCache32 + prevIndex, fOrigColors[i-1],
+                                      fOrigColors[i],
+                                      nextIndex - prevIndex + 1, fCacheAlpha);
                 prevIndex = nextIndex;
             }
             SkASSERT(prevIndex == kCache32Count - 1);
diff --git a/xcode/sampleapp/SampleApp.xcodeproj/project.pbxproj b/xcode/sampleapp/SampleApp.xcodeproj/project.pbxproj
index ab18fcc..813db06 100644
--- a/xcode/sampleapp/SampleApp.xcodeproj/project.pbxproj
+++ b/xcode/sampleapp/SampleApp.xcodeproj/project.pbxproj
@@ -73,7 +73,7 @@
 		00A7295D0FD8397600D5051F /* SampleAll.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2762F6740FCCCB01002BD8B4 /* SampleAll.cpp */; };
 		00AF77B00FE2EA2D007F9650 /* SampleTestGL.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00A729630FD93ED600D5051F /* SampleTestGL.cpp */; };
 		00AF787E0FE94433007F9650 /* SamplePath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00003C640EFC22A8000FF73A /* SamplePath.cpp */; };
-		00C55DA10F8552DC000CAC09 /* SampleGradients.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00C55DA00F8552DC000CAC09 /* SampleGradients.cpp */; };
+		00F53F480FFCFC4D003FA70A /* SampleGradients.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00C55DA00F8552DC000CAC09 /* SampleGradients.cpp */; };
 		00FF39140FC6ED2C00915187 /* SampleEffects.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00FF39130FC6ED2C00915187 /* SampleEffects.cpp */; };
 		0156F80407C56A3000C6122B /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0156F80307C56A3000C6122B /* Foundation.framework */; };
 		01FC44D507BD3BB800D228F4 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 01FC44D407BD3BB800D228F4 /* Quartz.framework */; };
@@ -556,7 +556,6 @@
 				007A7CB30F01658C00A2D6EE /* SamplePicture.cpp in Sources */,
 				0041CE440F00A12400695E8C /* SampleLines.cpp in Sources */,
 				008C4D980F77DAEE0056981C /* SampleHairline.cpp in Sources */,
-				00C55DA10F8552DC000CAC09 /* SampleGradients.cpp in Sources */,
 				009490320FB0A5B90063C792 /* SampleLayerMask.cpp in Sources */,
 				007A7CB60F01658C00A2D6EE /* SampleRegion.cpp in Sources */,
 				00FF39140FC6ED2C00915187 /* SampleEffects.cpp in Sources */,
@@ -575,6 +574,7 @@
 				005E92E00FF08512008965B9 /* SampleFilter.cpp in Sources */,
 				0057785F0FF17CCC00582CD9 /* SampleMipMap.cpp in Sources */,
 				005778B40FF5616F00582CD9 /* SampleShapes.cpp in Sources */,
+				00F53F480FFCFC4D003FA70A /* SampleGradients.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};