Scanline decoding for bmp

Redesigns SkScanlineDecoder.h to indicate the ordering
in which the scanlines are provided

Refactors SkSwizzler::Fill() to include the zeroInit check
and to actually be correct.

BUG=skia:3257
BUG=skia:4198

Review URL: https://codereview.chromium.org/1287423002
diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp
index 1253014..e66a452 100644
--- a/dm/DMSrcSink.cpp
+++ b/dm/DMSrcSink.cpp
@@ -79,6 +79,26 @@
         || flags.approach != SinkFlags::kDirect;
 }
 
+SkScanlineDecoder* start_scanline_decoder(SkData* encoded, const SkImageInfo& info,
+        SkPMColor* colorPtr, int* colorCountPtr) {
+    SkAutoTDelete<SkScanlineDecoder> scanlineDecoder(SkScanlineDecoder::NewFromData(encoded));
+    if (nullptr == scanlineDecoder) {
+        return nullptr;
+    }
+    // DM scanline test assume kTopDown scanline ordering.  Other orderings are
+    // tested from within SkScaledCodec.
+    // TODO (msarett): Redesign the CodecSrc tests to improve our coverage of SkCodec and
+    //                 SkScanlineDecoder functionality.  Maybe we should write code to explicitly
+    //                 test kNone, kOutOfOrder, and kBottomUp.
+    if (SkScanlineDecoder::kTopDown_SkScanlineOrder != scanlineDecoder->getScanlineOrder()) {
+        return nullptr;
+    }
+    if (SkCodec::kSuccess != scanlineDecoder->start(info, NULL, colorPtr, colorCountPtr)) {
+        return nullptr;
+    }
+    return scanlineDecoder.detach();
+}
+
 Error CodecSrc::draw(SkCanvas* canvas) const {
     SkAutoTUnref<SkData> encoded(SkData::NewFromFileName(fPath.c_str()));
     if (!encoded) {
@@ -170,10 +190,9 @@
         }
         case kScanline_Mode: {
             SkAutoTDelete<SkScanlineDecoder> scanlineDecoder(
-                    SkScanlineDecoder::NewFromData(encoded));
-            if (nullptr == scanlineDecoder || SkCodec::kSuccess !=
-                    scanlineDecoder->start(decodeInfo, nullptr, colorPtr, colorCountPtr)) {
-                return Error::Nonfatal("Cannot use scanline decoder for all images");
+                    start_scanline_decoder(encoded.get(), decodeInfo, colorPtr, colorCountPtr));
+            if (nullptr == scanlineDecoder) {
+                return Error::Nonfatal("Could not start top-down scanline decoder");
             }
 
             const SkCodec::Result result = scanlineDecoder->getScanlines(
@@ -234,14 +253,12 @@
                     const int y = row * subsetHeight;
                     //create scanline decoder for each subset
                     SkAutoTDelete<SkScanlineDecoder> subsetScanlineDecoder(
-                            SkScanlineDecoder::NewFromData(encoded));
-                    if (nullptr == subsetScanlineDecoder || SkCodec::kSuccess !=
-                            subsetScanlineDecoder->start(
-                            decodeInfo, nullptr, colorPtr, colorCountPtr))
-                    {
+                            start_scanline_decoder(encoded.get(), decodeInfo,
+                                    colorPtr, colorCountPtr));
+                    if (nullptr == subsetScanlineDecoder) {
                         if (x == 0 && y == 0) {
                             //first try, image may not be compatible
-                            return Error::Nonfatal("Cannot use scanline decoder for all images");
+                            return Error::Nonfatal("Could not start top-down scanline decoder");
                         } else {
                             return "Error scanline decoder is nullptr";
                         }
@@ -304,10 +321,10 @@
             const int numStripes = (height + stripeHeight - 1) / stripeHeight;
 
             // Decode odd stripes
-            SkAutoTDelete<SkScanlineDecoder> decoder(SkScanlineDecoder::NewFromData(encoded));
-            if (nullptr == decoder || SkCodec::kSuccess !=
-                    decoder->start(decodeInfo, nullptr, colorPtr, colorCountPtr)) {
-                return Error::Nonfatal("Cannot use scanline decoder for all images");
+            SkAutoTDelete<SkScanlineDecoder> decoder(
+                    start_scanline_decoder(encoded.get(), decodeInfo, colorPtr, colorCountPtr));
+            if (nullptr == decoder) {
+                return Error::Nonfatal("Could not start top-down scanline decoder");
             }
             for (int i = 0; i < numStripes; i += 2) {
                 // Skip a stripe
diff --git a/include/codec/SkScanlineDecoder.h b/include/codec/SkScanlineDecoder.h
index 7946973..61184d6 100644
--- a/include/codec/SkScanlineDecoder.h
+++ b/include/codec/SkScanlineDecoder.h
@@ -153,21 +153,88 @@
     SkEncodedFormat getEncodedFormat() const { return this->onGetEncodedFormat(); }
 
     /**
-     * returns true if the image must be scaled, in the y direction, after reading, not during.
-     * To scale afterwards, we first decode every line and then sample the lines we want afterwards.
-     * An example is interlaced pngs, where calling getScanlines once (regardless of the count
-     * used) needs to read the entire image, therefore it is inefficient to call
-     * getScanlines more than once. Instead, it should only ever be called with all the
-     * rows needed.
+     *  The order in which rows are output from the scanline decoder is not the
+     *  same for all variations of all image types.  This explains the possible
+     *  output row orderings.
      */
-    bool requiresPostYSampling() {
-        return this->onRequiresPostYSampling();
+    enum SkScanlineOrder {
+        /*
+         * By far the most common, this indicates that the image can be decoded
+         * reliably using the scanline decoder, and that rows will be output in
+         * the logical order.
+         */
+        kTopDown_SkScanlineOrder,
+
+        /*
+         * This indicates that the scanline decoder reliably outputs rows, but
+         * they will be returned in reverse order.  If the scanline format is
+         * kBottomUp, the getY() API can be used to determine the actual
+         * y-coordinate of the next output row, but the client is not forced
+         * to take advantage of this, given that it's not too tough to keep
+         * track independently.
+         *
+         * For full image decodes, it is safe to get all of the scanlines at
+         * once, since the decoder will handle inverting the rows as it
+         * decodes.
+         *
+         * For subset decodes and sampling, it is simplest to get and skip
+         * scanlines one at a time, using the getY() API.  It is possible to
+         * ask for larger chunks at a time, but this should be used with
+         * caution.  As with full image decodes, the decoder will handle
+         * inverting the requested rows, but rows will still be delivered
+         * starting from the bottom of the image.
+         *
+         * Upside down bmps are an example.
+         */
+        kBottomUp_SkScanlineOrder,
+
+        /*
+         * This indicates that the scanline decoder reliably outputs rows, but
+         * they will not be in logical order.  If the scanline format is
+         * kOutOfOrder, the getY() API should be used to determine the actual
+         * y-coordinate of the next output row.
+         *
+         * For this scanline ordering, it is advisable to get and skip
+         * scanlines one at a time.
+         *
+         * Interlaced gifs are an example.
+         */
+        kOutOfOrder_SkScanlineOrder,
+
+        /*
+         * Indicates that the entire image must be decoded in order to output
+         * any amount of scanlines.  In this case, it is a REALLY BAD IDEA to
+         * request scanlines 1-by-1 or in small chunks.  The client should
+         * determine which scanlines are needed and ask for all of them in
+         * a single call to getScanlines().
+         *
+         * Interlaced pngs are an example.
+         */
+        kNone_SkScanlineOrder,
+    };
+
+    /**
+     *  An enum representing the order in which scanlines will be returned by
+     *  the scanline decoder.
+     */
+    SkScanlineOrder getScanlineOrder() const { return this->onGetScanlineOrder(); }
+
+    /**
+     *  Returns the y-coordinate of the next row to be returned by the scanline
+     *  decoder.  This will be overridden in the case of
+     *  kOutOfOrder_SkScanlineOrder and should be unnecessary in the case of
+     *  kNone_SkScanlineOrder.
+     */
+    int getY() const {
+        SkASSERT(kNone_SkScanlineOrder != this->getScanlineOrder());
+        return this->onGetY();
     }
 
 protected:
     SkScanlineDecoder(const SkImageInfo& srcInfo)
         : fSrcInfo(srcInfo)
         , fDstInfo()
+        , fOptions()
         , fCurrScanline(0) {}
 
     virtual SkISize onGetScaledDimensions(float /* desiredScale */) {
@@ -180,16 +247,23 @@
     virtual bool onReallyHasAlpha() const { return false; }
 
     /**
-     * returns true if the image type is hard to sample and must be scaled after reading, not during
-     * An example is interlaced pngs, where the entire image must be read for each decode
+     *  Most images types will be kTopDown and will not need to override this function.
      */
-    virtual bool onRequiresPostYSampling() { return false; }
+    virtual SkScanlineOrder onGetScanlineOrder() const { return kTopDown_SkScanlineOrder; }
+
+    /**
+     *  Most images will be kTopDown and will not need to override this function.
+     */
+    virtual int onGetY() const { return fCurrScanline; }
 
     const SkImageInfo& dstInfo() const { return fDstInfo; }
 
+    const SkCodec::Options& options() const { return fOptions; }
+
 private:
     const SkImageInfo   fSrcInfo;
     SkImageInfo         fDstInfo;
+    SkCodec::Options    fOptions;
     int                 fCurrScanline;
 
     virtual SkCodec::Result onStart(const SkImageInfo& dstInfo,
diff --git a/src/codec/SkBmpCodec.cpp b/src/codec/SkBmpCodec.cpp
index f2f6a8e..8d21e1d 100644
--- a/src/codec/SkBmpCodec.cpp
+++ b/src/codec/SkBmpCodec.cpp
@@ -11,6 +11,7 @@
 #include "SkBmpStandardCodec.h"
 #include "SkCodecPriv.h"
 #include "SkColorPriv.h"
+#include "SkScaledCodec.h"
 #include "SkStream.h"
 
 /*
@@ -261,10 +262,10 @@
     }
 
     // Check for valid dimensions from header
-    RowOrder rowOrder = kBottomUp_RowOrder;
+    SkScanlineDecoder::SkScanlineOrder rowOrder = SkScanlineDecoder::kBottomUp_SkScanlineOrder;
     if (height < 0) {
         height = -height;
-        rowOrder = kTopDown_RowOrder;
+        rowOrder = SkScanlineDecoder::kTopDown_SkScanlineOrder;
     }
     // The height field for bmp in ico is double the actual height because they
     // contain an XOR mask followed by an AND mask
@@ -526,7 +527,7 @@
 }
 
 SkBmpCodec::SkBmpCodec(const SkImageInfo& info, SkStream* stream,
-        uint16_t bitsPerPixel, RowOrder rowOrder)
+        uint16_t bitsPerPixel, SkScanlineDecoder::SkScanlineOrder rowOrder)
     : INHERITED(info, stream)
     , fBitsPerPixel(bitsPerPixel)
     , fRowOrder(rowOrder)
@@ -536,6 +537,14 @@
     return SkBmpCodec::ReadHeader(this->stream(), this->inIco(), nullptr);
 }
 
+int32_t SkBmpCodec::getDstRow(int32_t y, int32_t height) {
+    if (SkScanlineDecoder::kTopDown_SkScanlineOrder == fRowOrder) {
+        return y;
+    }
+    SkASSERT(SkScanlineDecoder::kBottomUp_SkScanlineOrder == fRowOrder);
+    return height - y - 1;
+}
+
 /*
  * Get the destination row to start filling from
  * Used to fill the remainder of the image on incomplete input for bmps
@@ -544,7 +553,8 @@
  * filling at the top of the image.
  */
 void* SkBmpCodec::getDstStartRow(void* dst, size_t dstRowBytes, int32_t y) const {
-    return (kTopDown_RowOrder == fRowOrder) ? SkTAddOffset<void*>(dst, y * dstRowBytes) : dst;
+    return (SkScanlineDecoder::kTopDown_SkScanlineOrder == fRowOrder) ?
+            SkTAddOffset<void*>(dst, y * dstRowBytes) : dst;
 }
 
 /*
@@ -559,3 +569,72 @@
     }
     return numColors;
 }
+
+/*
+ * Scanline decoder for bmps
+ */
+class SkBmpScanlineDecoder : public SkScanlineDecoder {
+public:
+    SkBmpScanlineDecoder(SkBmpCodec* codec)
+        : INHERITED(codec->getInfo())
+        , fCodec(codec)
+    {}
+
+    SkEncodedFormat onGetEncodedFormat() const override {
+        return kBMP_SkEncodedFormat;
+    }
+
+    SkCodec::Result onStart(const SkImageInfo& dstInfo, const SkCodec::Options& options,
+                            SkPMColor inputColorPtr[], int* inputColorCount) override {
+        if (!fCodec->rewindIfNeeded()) {
+            return SkCodec::kCouldNotRewind;
+        }
+        if (options.fSubset) {
+            // Subsets are not supported.
+            return SkCodec::kUnimplemented;
+        }
+        if (dstInfo.dimensions() != this->getInfo().dimensions()) {
+            if (!SkScaledCodec::DimensionsSupportedForSampling(this->getInfo(), dstInfo)) {
+                return SkCodec::kInvalidScale;
+            }
+        }
+        if (!conversion_possible(dstInfo, this->getInfo())) {
+            SkCodecPrintf("Error: cannot convert input type to output type.\n");
+            return SkCodec::kInvalidConversion;
+        }
+
+        return fCodec->prepareToDecode(dstInfo, options, inputColorPtr, inputColorCount);
+    }
+
+    SkCodec::Result onGetScanlines(void* dst, int count, size_t rowBytes) override {
+        // Create a new image info representing the portion of the image to decode
+        SkImageInfo rowInfo = this->dstInfo().makeWH(this->dstInfo().width(), count);
+
+        // Decode the requested rows
+        return fCodec->decodeRows(rowInfo, dst, rowBytes, this->options());
+    }
+
+    SkScanlineOrder onGetScanlineOrder() const override {
+        return fCodec->fRowOrder;
+    }
+
+    int onGetY() const override {
+        return fCodec->getDstRow(this->INHERITED::onGetY(), this->dstInfo().height());
+    }
+
+    // TODO(msarett): Override default skipping with something more clever.
+
+private:
+    SkAutoTDelete<SkBmpCodec> fCodec;
+
+    typedef SkScanlineDecoder INHERITED;
+};
+
+SkScanlineDecoder* SkBmpCodec::NewSDFromStream(SkStream* stream) {
+    SkAutoTDelete<SkBmpCodec> codec(static_cast<SkBmpCodec*>(SkBmpCodec::NewFromStream(stream)));
+    if (!codec) {
+        return NULL;
+    }
+
+    return SkNEW_ARGS(SkBmpScanlineDecoder, (codec.detach()));
+}
diff --git a/src/codec/SkBmpCodec.h b/src/codec/SkBmpCodec.h
index ee14f89..4b2cd2a 100644
--- a/src/codec/SkBmpCodec.h
+++ b/src/codec/SkBmpCodec.h
@@ -11,6 +11,7 @@
 #include "SkColorTable.h"
 #include "SkImageInfo.h"
 #include "SkMaskSwizzler.h"
+#include "SkScanlineDecoder.h"
 #include "SkStream.h"
 #include "SkSwizzler.h"
 #include "SkTypes.h"
@@ -23,14 +24,6 @@
 public:
 
     /*
-     * Describes if rows of the input start at the top or bottom of the image
-     */
-    enum RowOrder {
-        kTopDown_RowOrder,
-        kBottomUp_RowOrder
-    };
-
-    /*
      * Checks the start of the stream to see if the image is a bmp
      */
     static bool IsBmp(SkStream*);
@@ -48,10 +41,17 @@
      */
     static SkCodec* NewFromIco(SkStream*);
 
+    /*
+     * Assumes IsBmp was called and returned true
+     * Creates a bmp scanline decoder
+     * Takes ownership of the stream
+     */
+    static SkScanlineDecoder* NewSDFromStream(SkStream* stream);
+
 protected:
 
     SkBmpCodec(const SkImageInfo& info, SkStream* stream, uint16_t bitsPerPixel,
-            RowOrder rowOrder);
+            SkScanlineDecoder::SkScanlineOrder rowOrder);
 
     SkEncodedFormat onGetEncodedFormat() const override { return kBMP_SkEncodedFormat; }
 
@@ -77,6 +77,19 @@
     }
 
     /*
+     * Get the destination row number corresponding to the encoded row number.
+     * For kTopDown, we simply return y, but for kBottomUp, the rows will be
+     * decoded in reverse order.
+     *
+     * @param y      Iterates from 0 to height, indicating the current row.
+     * @param height The height of the current subset of the image that we are
+     *               decoding.  This is generally equal to the full height
+     *               when we want to decode the full or one when we are
+     *               sampling.
+     */
+    int32_t getDstRow(int32_t y, int32_t height);
+
+    /*
      * Get the destination row to start filling from
      * Used to fill the remainder of the image on incomplete input for bmps
      * This is tricky since bmps may be kTopDown or kBottomUp.  For kTopDown,
@@ -94,7 +107,26 @@
      * Accessors used by subclasses
      */
     uint16_t bitsPerPixel() const { return fBitsPerPixel; }
-    RowOrder rowOrder() const { return fRowOrder; }
+    SkScanlineDecoder::SkScanlineOrder rowOrder() const { return fRowOrder; }
+
+    /*
+     * To be overriden by bmp subclasses, which provide unique implementations.
+     * Performs subclass specific setup.
+     *
+     * @param dstInfo         Contains output information.  Height specifies
+     *                        the total number of rows that will be decoded.
+     * @param options         Additonal options to pass to the decoder.
+     * @param inputColorPtr   Client-provided memory for a color table.  Must
+     *                        be enough for 256 colors.  This will be
+     *                        populated with colors if the encoded image uses
+     *                        a color table.
+     * @param inputColorCount If the encoded image uses a color table, this
+     *                        will be set to the number of colors in the
+     *                        color table.
+     */
+    virtual SkCodec::Result prepareToDecode(const SkImageInfo& dstInfo,
+            const SkCodec::Options& options, SkPMColor inputColorPtr[],
+            int* inputColorCount) = 0;
 
 private:
 
@@ -104,8 +136,27 @@
      */
     static SkCodec* NewFromStream(SkStream*, bool inIco);
 
-    const uint16_t fBitsPerPixel;
-    const RowOrder fRowOrder;
+    /*
+     * Decodes the next dstInfo.height() lines.
+     *
+     * onGetPixels() uses this for full image decodes.
+     * SkScaledCodec::onGetPixels() uses the scanline decoder to call this with
+     * dstInfo.height() = 1, in order to implement sampling.
+     * A potential future use is to allow the caller to decode a subset of the
+     * lines in the image.
+     *
+     * @param dstInfo     Contains output information.  Height specifies the
+     *                    number of rows to decode at this time.
+     * @param dst         Memory location to store output pixels
+     * @param dstRowBytes Bytes in a row of the destination
+     */
+    virtual Result decodeRows(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes,
+            const Options& opts) = 0;
+
+    const uint16_t                           fBitsPerPixel;
+    const SkScanlineDecoder::SkScanlineOrder fRowOrder;
+
+    friend class SkBmpScanlineDecoder;
 
     typedef SkCodec INHERITED;
 };
diff --git a/src/codec/SkBmpMaskCodec.cpp b/src/codec/SkBmpMaskCodec.cpp
index 5684b68..3036f33 100644
--- a/src/codec/SkBmpMaskCodec.cpp
+++ b/src/codec/SkBmpMaskCodec.cpp
@@ -14,11 +14,12 @@
  */
 SkBmpMaskCodec::SkBmpMaskCodec(const SkImageInfo& info, SkStream* stream,
                                uint16_t bitsPerPixel, SkMasks* masks,
-                               SkBmpCodec::RowOrder rowOrder)
+                               SkScanlineDecoder::SkScanlineOrder rowOrder)
     : INHERITED(info, stream, bitsPerPixel, rowOrder)
     , fMasks(masks)
     , fMaskSwizzler(nullptr)
-    , fSrcBuffer(nullptr)
+    , fSrcRowBytes(SkAlign4(compute_row_bytes(this->getInfo().width(), this->bitsPerPixel())))
+    , fSrcBuffer(new uint8_t [fSrcRowBytes])
 {}
 
 /*
@@ -46,23 +47,18 @@
         return kInvalidConversion;
     }
 
-    // Initialize a the mask swizzler
-    if (!this->initializeSwizzler(dstInfo)) {
-        SkCodecPrintf("Error: cannot initialize swizzler.\n");
-        return kInvalidConversion;
+    Result result = this->prepareToDecode(dstInfo, opts, inputColorPtr, inputColorCount);
+    if (kSuccess != result) {
+        return result;
     }
 
-    return this->decode(dstInfo, dst, dstRowBytes, opts);
+    return this->decodeRows(dstInfo, dst, dstRowBytes, opts);
 }
 
 bool SkBmpMaskCodec::initializeSwizzler(const SkImageInfo& dstInfo) {
-    // Allocate space for a row buffer
-    const size_t rowBytes = SkAlign4(compute_row_bytes(dstInfo.width(), this->bitsPerPixel()));
-    fSrcBuffer.reset(new uint8_t[rowBytes]);
-
     // Create the swizzler
     fMaskSwizzler.reset(SkMaskSwizzler::CreateMaskSwizzler(
-            dstInfo, fMasks, this->bitsPerPixel()));
+            dstInfo, this->getInfo(), fMasks, this->bitsPerPixel()));
 
     if (nullptr == fMaskSwizzler.get()) {
         return false;
@@ -71,36 +67,40 @@
     return true;
 }
 
+SkCodec::Result SkBmpMaskCodec::prepareToDecode(const SkImageInfo& dstInfo,
+        const SkCodec::Options& options, SkPMColor inputColorPtr[], int* inputColorCount) {
+    // Initialize a the mask swizzler
+    if (!this->initializeSwizzler(dstInfo)) {
+        SkCodecPrintf("Error: cannot initialize swizzler.\n");
+        return SkCodec::kInvalidConversion;
+    }
+
+    return SkCodec::kSuccess;
+}
+
 /*
  * Performs the decoding
  */
-SkCodec::Result SkBmpMaskCodec::decode(const SkImageInfo& dstInfo,
-                                       void* dst, size_t dstRowBytes,
-                                       const Options& opts) {
-    // Set constant values
-    const int width = dstInfo.width();
-    const int height = dstInfo.height();
-    const size_t rowBytes = SkAlign4(compute_row_bytes(width, this->bitsPerPixel()));
-
+SkCodec::Result SkBmpMaskCodec::decodeRows(const SkImageInfo& dstInfo,
+                                           void* dst, size_t dstRowBytes,
+                                           const Options& opts) {
     // Iterate over rows of the image
     uint8_t* srcRow = fSrcBuffer.get();
+    const int height = dstInfo.height();
     for (int y = 0; y < height; y++) {
         // Read a row of the input
-        if (this->stream()->read(srcRow, rowBytes) != rowBytes) {
+        if (this->stream()->read(srcRow, fSrcRowBytes) != fSrcRowBytes) {
             SkCodecPrintf("Warning: incomplete input stream.\n");
             // Fill the destination image on failure
-            SkPMColor fillColor = dstInfo.alphaType() == kOpaque_SkAlphaType ?
-                    SK_ColorBLACK : SK_ColorTRANSPARENT;
-            if (kNo_ZeroInitialized == opts.fZeroInitialized || 0 != fillColor) {
-                void* dstStart = this->getDstStartRow(dst, dstRowBytes, y);
-                SkSwizzler::Fill(dstStart, dstInfo, dstRowBytes, dstInfo.height() - y, fillColor,
-                        nullptr);
-            }
+            void* dstStart = this->getDstStartRow(dst, dstRowBytes, y);
+            uint32_t fillColor = get_fill_color_or_index(dstInfo.alphaType());
+            SkSwizzler::Fill(dstStart, dstInfo, dstRowBytes, height - y,
+                    fillColor, nullptr, opts.fZeroInitialized);
             return kIncompleteInput;
         }
 
         // Decode the row in destination format
-        int row = SkBmpCodec::kBottomUp_RowOrder == this->rowOrder() ? height - 1 - y : y;
+        uint32_t row = this->getDstRow(y, height);
         void* dstRow = SkTAddOffset<void>(dst, row * dstRowBytes);
         fMaskSwizzler->swizzle(dstRow, srcRow);
     }
diff --git a/src/codec/SkBmpMaskCodec.h b/src/codec/SkBmpMaskCodec.h
index 6f43bb3..58b2e6b 100644
--- a/src/codec/SkBmpMaskCodec.h
+++ b/src/codec/SkBmpMaskCodec.h
@@ -29,7 +29,8 @@
      * @param rowOrder indicates whether rows are ordered top-down or bottom-up
      */
     SkBmpMaskCodec(const SkImageInfo& srcInfo, SkStream* stream,
-                   uint16_t bitsPerPixel, SkMasks* masks, RowOrder rowOrder);
+            uint16_t bitsPerPixel, SkMasks* masks,
+            SkScanlineDecoder::SkScanlineOrder rowOrder);
 
 protected:
 
@@ -37,15 +38,20 @@
                        size_t dstRowBytes, const Options&, SkPMColor*,
                        int*) override;
 
+    SkCodec::Result prepareToDecode(const SkImageInfo& dstInfo,
+            const SkCodec::Options& options, SkPMColor inputColorPtr[],
+            int* inputColorCount) override;
+
 private:
 
     bool initializeSwizzler(const SkImageInfo& dstInfo);
 
-    Result decode(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes,
-                  const Options& opts);
+    Result decodeRows(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes,
+                      const Options& opts) override;
 
     SkAutoTDelete<SkMasks>              fMasks;        // owned
     SkAutoTDelete<SkMaskSwizzler>       fMaskSwizzler;
+    const size_t                        fSrcRowBytes;
     SkAutoTDeleteArray<uint8_t>         fSrcBuffer;
 
     typedef SkBmpCodec INHERITED;
diff --git a/src/codec/SkBmpRLECodec.cpp b/src/codec/SkBmpRLECodec.cpp
index 1bd1409..58c0605 100644
--- a/src/codec/SkBmpRLECodec.cpp
+++ b/src/codec/SkBmpRLECodec.cpp
@@ -8,6 +8,7 @@
 #include "SkBmpRLECodec.h"
 #include "SkCodecPriv.h"
 #include "SkColorPriv.h"
+#include "SkScaledCodec.h"
 #include "SkScanlineDecoder.h"
 #include "SkStream.h"
 
@@ -15,13 +16,10 @@
  * Creates an instance of the decoder
  * Called only by NewFromStream
  */
-SkBmpRLECodec::SkBmpRLECodec(const SkImageInfo& info,
-                             SkStream* stream,
-                             uint16_t bitsPerPixel,
-                             uint32_t numColors,
-                             uint32_t bytesPerColor,
-                             uint32_t offset,
-                             SkBmpCodec::RowOrder rowOrder,
+SkBmpRLECodec::SkBmpRLECodec(const SkImageInfo& info, SkStream* stream,
+                             uint16_t bitsPerPixel, uint32_t numColors,
+                             uint32_t bytesPerColor, uint32_t offset,
+                             SkScanlineDecoder::SkScanlineOrder rowOrder,
                              size_t RLEBytes)
     : INHERITED(info, stream, bitsPerPixel, rowOrder)
     , fColorTable(nullptr)
@@ -30,16 +28,18 @@
     , fOffset(offset)
     , fStreamBuffer(new uint8_t[RLEBytes])
     , fRLEBytes(RLEBytes)
-    , fCurrRLEByte(0) {}
+    , fCurrRLEByte(0)
+    , fSampleX(1)
+{}
 
 /*
  * Initiates the bitmap decode
  */
 SkCodec::Result SkBmpRLECodec::onGetPixels(const SkImageInfo& dstInfo,
-                                        void* dst, size_t dstRowBytes,
-                                        const Options& opts,
-                                        SkPMColor* inputColorPtr,
-                                        int* inputColorCount) {
+                                           void* dst, size_t dstRowBytes,
+                                           const Options& opts,
+                                           SkPMColor* inputColorPtr,
+                                           int* inputColorCount) {
     if (!this->rewindIfNeeded()) {
         return kCouldNotRewind;
     }
@@ -56,24 +56,13 @@
         return kInvalidConversion;
     }
 
-    // Create the color table if necessary and prepare the stream for decode
-    // Note that if it is non-nullptr, inputColorCount will be modified
-    if (!this->createColorTable(inputColorCount)) {
-        SkCodecPrintf("Error: could not create color table.\n");
-        return kInvalidInput;
-    }
-
-    // Copy the color table to the client if necessary
-    copy_color_table(dstInfo, fColorTable, inputColorPtr, inputColorCount);
-
-    // Initialize a swizzler if necessary
-    if (!this->initializeStreamBuffer()) {
-        SkCodecPrintf("Error: cannot initialize swizzler.\n");
-        return kInvalidConversion;
+    Result result = this->prepareToDecode(dstInfo, opts, inputColorPtr, inputColorCount);
+    if (kSuccess != result) {
+        return result;
     }
 
     // Perform the decode
-    return decode(dstInfo, dst, dstRowBytes, opts);
+    return this->decodeRows(dstInfo, dst, dstRowBytes, opts);
 }
 
 /*
@@ -144,6 +133,15 @@
 
 bool SkBmpRLECodec::initializeStreamBuffer() {
     // Setup a buffer to contain the full input stream
+    // TODO (msarett): I'm not sure it is smart or optimal to trust fRLEBytes (read from header)
+    //                 as the size of our buffer.  First of all, the decode fails if fRLEBytes is
+    //                 corrupt (negative, zero, or small) when we might be able to decode
+    //                 successfully with a fixed size buffer.  Additionally, we would save memory
+    //                 using a fixed size buffer if the RLE encoding is large.  On the other hand,
+    //                 we may also waste memory with a fixed size buffer.  And determining a
+    //                 minimum size for our buffer would depend on the image width (so it's not
+    //                 really "fixed" size), and we may end up allocating a buffer that is
+    //                 generally larger than the average encoded size anyway.
     size_t totalBytes = this->stream()->read(fStreamBuffer.get(), fRLEBytes);
     if (totalBytes < fRLEBytes) {
         fRLEBytes = totalBytes;
@@ -153,6 +151,7 @@
         SkCodecPrintf("Error: could not read RLE image data.\n");
         return false;
     }
+    fCurrRLEByte = 0;
     return true;
 }
 
@@ -197,32 +196,29 @@
 void SkBmpRLECodec::setPixel(void* dst, size_t dstRowBytes,
                              const SkImageInfo& dstInfo, uint32_t x, uint32_t y,
                              uint8_t index) {
-    // Set the row
-    int height = dstInfo.height();
-    int row;
-    if (SkBmpCodec::kBottomUp_RowOrder == this->rowOrder()) {
-        row = height - y - 1;
-    } else {
-        row = y;
-    }
+    if (is_coord_necessary(x, fSampleX, dstInfo.width())) {
+        // Set the row
+        uint32_t row = this->getDstRow(y, dstInfo.height());
 
-    // Set the pixel based on destination color type
-    switch (dstInfo.colorType()) {
-        case kN32_SkColorType: {
-            SkPMColor* dstRow = SkTAddOffset<SkPMColor>(dst, row * (int) dstRowBytes);
-            dstRow[x] = fColorTable->operator[](index);
-            break;
+        // Set the pixel based on destination color type
+        const int dstX = get_dst_coord(x, fSampleX);
+        switch (dstInfo.colorType()) {
+            case kN32_SkColorType: {
+                SkPMColor* dstRow = SkTAddOffset<SkPMColor>(dst, row * (int) dstRowBytes);
+                dstRow[dstX] = fColorTable->operator[](index);
+                break;
+            }
+            case kRGB_565_SkColorType: {
+                uint16_t* dstRow = SkTAddOffset<uint16_t>(dst, row * (int) dstRowBytes);
+                dstRow[dstX] = SkPixel32ToPixel16(fColorTable->operator[](index));
+                break;
+            }
+            default:
+                // This case should not be reached.  We should catch an invalid
+                // color type when we check that the conversion is possible.
+                SkASSERT(false);
+                break;
         }
-        case kRGB_565_SkColorType: {
-            uint16_t* dstRow = SkTAddOffset<uint16_t>(dst, row * (int) dstRowBytes);
-            dstRow[x] = SkPixel32ToPixel16(fColorTable->operator[](index));
-            break;
-        }
-        default:
-            // This case should not be reached.  We should catch an invalid
-            // color type when we check that the conversion is possible.
-            SkASSERT(false);
-            break;
     }
 }
 
@@ -233,42 +229,62 @@
                                 const SkImageInfo& dstInfo, uint32_t x,
                                 uint32_t y, uint8_t red, uint8_t green,
                                 uint8_t blue) {
-    // Set the row
-    int height = dstInfo.height();
-    int row;
-    if (SkBmpCodec::kBottomUp_RowOrder == this->rowOrder()) {
-        row = height - y - 1;
-    } else {
-        row = y;
+    if (is_coord_necessary(x, fSampleX, dstInfo.width())) {
+        // Set the row
+        uint32_t row = this->getDstRow(y, dstInfo.height());
+
+        // Set the pixel based on destination color type
+        const int dstX = get_dst_coord(x, fSampleX);
+        switch (dstInfo.colorType()) {
+            case kN32_SkColorType: {
+                SkPMColor* dstRow = SkTAddOffset<SkPMColor>(dst, row * (int) dstRowBytes);
+                dstRow[dstX] = SkPackARGB32NoCheck(0xFF, red, green, blue);
+                break;
+            }
+            case kRGB_565_SkColorType: {
+                uint16_t* dstRow = SkTAddOffset<uint16_t>(dst, row * (int) dstRowBytes);
+                dstRow[dstX] = SkPack888ToRGB16(red, green, blue);
+                break;
+            }
+            default:
+                // This case should not be reached.  We should catch an invalid
+                // color type when we check that the conversion is possible.
+                SkASSERT(false);
+                break;
+        }
+    }
+}
+
+SkCodec::Result SkBmpRLECodec::prepareToDecode(const SkImageInfo& dstInfo,
+        const SkCodec::Options& options, SkPMColor inputColorPtr[], int* inputColorCount) {
+    // Create the color table if necessary and prepare the stream for decode
+    // Note that if it is non-NULL, inputColorCount will be modified
+    if (!this->createColorTable(inputColorCount)) {
+        SkCodecPrintf("Error: could not create color table.\n");
+        return SkCodec::kInvalidInput;
     }
 
-    // Set the pixel based on destination color type
-    switch (dstInfo.colorType()) {
-        case kN32_SkColorType: {
-            SkPMColor* dstRow = SkTAddOffset<SkPMColor>(dst, row * (int) dstRowBytes);
-            dstRow[x] = SkPackARGB32NoCheck(0xFF, red, green, blue);
-            break;
-        }
-        case kRGB_565_SkColorType: {
-            uint16_t* dstRow = SkTAddOffset<uint16_t>(dst, row * (int) dstRowBytes);
-            dstRow[x] = SkPack888ToRGB16(red, green, blue);
-            break;
-        }
-        default:
-            // This case should not be reached.  We should catch an invalid
-            // color type when we check that the conversion is possible.
-            SkASSERT(false);
-            break;
+    // Copy the color table to the client if necessary
+    copy_color_table(dstInfo, this->fColorTable, inputColorPtr, inputColorCount);
+
+    // Initialize a buffer for encoded RLE data
+    if (!this->initializeStreamBuffer()) {
+        SkCodecPrintf("Error: cannot initialize stream buffer.\n");
+        return SkCodec::kInvalidConversion;
     }
+
+    SkScaledCodec::ComputeSampleSize(dstInfo, this->getInfo(), &fSampleX, NULL);
+
+    return SkCodec::kSuccess;
 }
 
 /*
  * Performs the bitmap decoding for RLE input format
  * RLE decoding is performed all at once, rather than a one row at a time
  */
-SkCodec::Result SkBmpRLECodec::decode(const SkImageInfo& dstInfo,
-                                      void* dst, size_t dstRowBytes,
-                                      const Options& opts) {
+SkCodec::Result SkBmpRLECodec::decodeRows(const SkImageInfo& dstInfo,
+                                          void* dst, size_t dstRowBytes,
+                                          const Options& opts) {
     // Set RLE flags
     static const uint8_t RLE_ESCAPE = 0;
     static const uint8_t RLE_EOL = 0;
@@ -276,7 +292,7 @@
     static const uint8_t RLE_DELTA = 2;
 
     // Set constant values
-    const int width = dstInfo.width();
+    const int width = this->getInfo().width();
     const int height = dstInfo.height();
 
     // Destination parameters
@@ -288,9 +304,8 @@
     // Because of the need for transparent pixels, kN32 is the only color
     // type that makes sense for the destination format.
     SkASSERT(kN32_SkColorType == dstInfo.colorType());
-    if (kNo_ZeroInitialized == opts.fZeroInitialized) {
-        SkSwizzler::Fill(dst, dstInfo, dstRowBytes, height, SK_ColorTRANSPARENT, nullptr);
-    }
+    SkSwizzler::Fill(dst, dstInfo, dstRowBytes, height, SK_ColorTRANSPARENT,
+            NULL, opts.fZeroInitialized);
 
     while (true) {
         // If we have reached a row that is beyond the requested height, we have
diff --git a/src/codec/SkBmpRLECodec.h b/src/codec/SkBmpRLECodec.h
index 65b0180..aa5b061 100644
--- a/src/codec/SkBmpRLECodec.h
+++ b/src/codec/SkBmpRLECodec.h
@@ -35,9 +35,9 @@
      *                 after decoding the headers
      */
     SkBmpRLECodec(const SkImageInfo& srcInfo, SkStream* stream,
-                  uint16_t bitsPerPixel, uint32_t numColors,
-                  uint32_t bytesPerColor, uint32_t offset,
-                  SkBmpCodec::RowOrder rowOrder, size_t RLEBytes);
+            uint16_t bitsPerPixel, uint32_t numColors, uint32_t bytesPerColor,
+            uint32_t offset, SkScanlineDecoder::SkScanlineOrder rowOrder,
+            size_t RLEBytes);
 
 protected:
 
@@ -45,6 +45,10 @@
                        size_t dstRowBytes, const Options&, SkPMColor*,
                        int*) override;
 
+    SkCodec::Result prepareToDecode(const SkImageInfo& dstInfo,
+            const SkCodec::Options& options, SkPMColor inputColorPtr[],
+            int* inputColorCount) override;
+
 private:
 
     /*
@@ -77,11 +81,8 @@
                      const SkImageInfo& dstInfo, uint32_t x, uint32_t y,
                      uint8_t red, uint8_t green, uint8_t blue);
 
-    /*
-     * Performs the bitmap decoding for RLE input format
-     */
-    Result decode(const SkImageInfo& dstInfo, void* dst,
-                  size_t dstRowBytes, const Options& opts);
+    Result decodeRows(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes,
+                      const Options& opts) override;
 
     SkAutoTUnref<SkColorTable>          fColorTable;    // owned
     const uint32_t                      fNumColors;
@@ -90,6 +91,7 @@
     SkAutoTDeleteArray<uint8_t>         fStreamBuffer;
     size_t                              fRLEBytes;
     uint32_t                            fCurrRLEByte;
+    int                                 fSampleX;
 
     typedef SkBmpCodec INHERITED;
 };
diff --git a/src/codec/SkBmpStandardCodec.cpp b/src/codec/SkBmpStandardCodec.cpp
index 210498e..59bc917 100644
--- a/src/codec/SkBmpStandardCodec.cpp
+++ b/src/codec/SkBmpStandardCodec.cpp
@@ -18,14 +18,15 @@
 SkBmpStandardCodec::SkBmpStandardCodec(const SkImageInfo& info, SkStream* stream,
                                        uint16_t bitsPerPixel, uint32_t numColors,
                                        uint32_t bytesPerColor, uint32_t offset,
-                                       SkBmpCodec::RowOrder rowOrder, bool inIco)
+                                       SkScanlineDecoder::SkScanlineOrder rowOrder, bool inIco)
     : INHERITED(info, stream, bitsPerPixel, rowOrder)
     , fColorTable(nullptr)
     , fNumColors(this->computeNumColors(numColors))
     , fBytesPerColor(bytesPerColor)
     , fOffset(offset)
     , fSwizzler(nullptr)
-    , fSrcBuffer(nullptr)
+    , fSrcRowBytes(SkAlign4(compute_row_bytes(this->getInfo().width(), this->bitsPerPixel())))
+    , fSrcBuffer(new uint8_t [fSrcRowBytes])
     , fInIco(inIco)
 {}
 
@@ -53,23 +54,18 @@
         return kInvalidConversion;
     }
 
-    // Create the color table if necessary and prepare the stream for decode
-    // Note that if it is non-nullptr, inputColorCount will be modified
-    if (!this->createColorTable(dstInfo.alphaType(), inputColorCount)) {
-        SkCodecPrintf("Error: could not create color table.\n");
-        return kInvalidInput;
+    Result result = this->prepareToDecode(dstInfo, opts, inputColorPtr, inputColorCount);
+    if (kSuccess != result) {
+        return result;
     }
-
-    // Copy the color table to the client if necessary
-    copy_color_table(dstInfo, fColorTable, inputColorPtr, inputColorCount);
-
-    // Initialize a swizzler if necessary
-    if (!this->initializeSwizzler(dstInfo, opts)) {
-        SkCodecPrintf("Error: cannot initialize swizzler.\n");
-        return kInvalidConversion;
+    result = this->decodeRows(dstInfo, dst, dstRowBytes, opts);
+    if (kSuccess != result) {
+        return result;
     }
-
-    return this->decode(dstInfo, dst, dstRowBytes, opts);
+    if (fInIco) {
+        return this->decodeIcoMask(dstInfo, dst, dstRowBytes);
+    }
+    return kSuccess;
 }
 
 /*
@@ -169,10 +165,6 @@
 
 bool SkBmpStandardCodec::initializeSwizzler(const SkImageInfo& dstInfo,
                                             const Options& opts) {
-    // Allocate space for a row buffer
-    const size_t rowBytes = SkAlign4(compute_row_bytes(dstInfo.width(), this->bitsPerPixel()));
-    fSrcBuffer.reset(new uint8_t[rowBytes]);
-
     // Get swizzler configuration
     SkSwizzler::SrcConfig config;
     switch (this->bitsPerPixel()) {
@@ -216,122 +208,93 @@
     return true;
 }
 
-/*
- * Choose a fill for failures due to an incomplete image.  We will use zero as
- * the default palette index, black for opaque images, and transparent for
- * non-opaque images.
- */
-static uint32_t get_fill_color_or_index(uint16_t bitsPerPixels, SkAlphaType alphaType) {
-    uint32_t fillColorOrIndex;
-    switch (bitsPerPixels) {
-        case 1:
-        case 2:
-        case 4:
-        case 8:
-            fillColorOrIndex = 0;
-            break;
-        case 24:
-            fillColorOrIndex = SK_ColorBLACK;
-            break;
-        case 32:
-            if (kOpaque_SkAlphaType == alphaType) {
-                fillColorOrIndex = SK_ColorBLACK;
-            } else {
-                fillColorOrIndex = SK_ColorTRANSPARENT;
-            }
-            break;
-        default:
-            SkASSERT(false);
-            return 0;
+SkCodec::Result SkBmpStandardCodec::prepareToDecode(const SkImageInfo& dstInfo,
+        const SkCodec::Options& options, SkPMColor inputColorPtr[], int* inputColorCount) {
+    // Create the color table if necessary and prepare the stream for decode
+    // Note that if it is non-NULL, inputColorCount will be modified
+    if (!this->createColorTable(dstInfo.alphaType(), inputColorCount)) {
+        SkCodecPrintf("Error: could not create color table.\n");
+        return SkCodec::kInvalidInput;
     }
-    return fillColorOrIndex;
+
+    // Copy the color table to the client if necessary
+    copy_color_table(dstInfo, this->fColorTable, inputColorPtr, inputColorCount);
+
+    // Initialize a swizzler if necessary
+    if (!this->initializeSwizzler(dstInfo, options)) {
+        SkCodecPrintf("Error: cannot initialize swizzler.\n");
+        return SkCodec::kInvalidConversion;
+    }
+    return SkCodec::kSuccess;
 }
 
 /*
  * Performs the bitmap decoding for standard input format
  */
-SkCodec::Result SkBmpStandardCodec::decode(const SkImageInfo& dstInfo,
-                                   void* dst, size_t dstRowBytes,
-                                   const Options& opts) {
-    // Set constant values
-    const int width = dstInfo.width();
-    const int height = dstInfo.height();
-    const size_t rowBytes = SkAlign4(compute_row_bytes(width, this->bitsPerPixel()));
-
+SkCodec::Result SkBmpStandardCodec::decodeRows(const SkImageInfo& dstInfo,
+                                               void* dst, size_t dstRowBytes,
+                                               const Options& opts) {
     // Iterate over rows of the image
+    const int height = dstInfo.height();
     for (int y = 0; y < height; y++) {
         // Read a row of the input
-        if (this->stream()->read(fSrcBuffer.get(), rowBytes) != rowBytes) {
+        if (this->stream()->read(fSrcBuffer.get(), fSrcRowBytes) != fSrcRowBytes) {
             SkCodecPrintf("Warning: incomplete input stream.\n");
             // Fill the destination image on failure
-            // Get the fill color/index and check if it is 0
-            uint32_t fillColorOrIndex = get_fill_color_or_index(this->bitsPerPixel(),
-                    dstInfo.alphaType());
-            bool zeroFill = (0 == fillColorOrIndex);
-
-            if (kNo_ZeroInitialized == opts.fZeroInitialized || !zeroFill) {
-                // Get a pointer to the color table if it exists
-                const SkPMColor* colorPtr = get_color_ptr(fColorTable.get());
-
-                void* dstStart = this->getDstStartRow(dst, dstRowBytes, y);
-                SkSwizzler::Fill(dstStart, dstInfo, dstRowBytes, dstInfo.height() - y,
-                        fillColorOrIndex, colorPtr);
-            }
+            void* dstStart = this->getDstStartRow(dst, dstRowBytes, y);
+            const SkPMColor* colorPtr = get_color_ptr(fColorTable.get());
+            uint32_t fillColorOrIndex = get_fill_color_or_index(dstInfo.alphaType());
+            SkSwizzler::Fill(dstStart, dstInfo, dstRowBytes, dstInfo.height() - y,
+                    fillColorOrIndex, colorPtr, opts.fZeroInitialized);
             return kIncompleteInput;
         }
 
         // Decode the row in destination format
-        uint32_t row;
-        if (SkBmpCodec::kTopDown_RowOrder == this->rowOrder()) {
-            row = y;
-        } else {
-            row = height - 1 - y;
-        }
+        uint32_t row = this->getDstRow(y, dstInfo.height());
 
         void* dstRow = SkTAddOffset<void>(dst, row * dstRowBytes);
         fSwizzler->swizzle(dstRow, fSrcBuffer.get());
     }
 
-    // Finally, apply the AND mask for bmp-in-ico images
-    if (fInIco) {
-        // BMP in ICO have transparency, so this cannot be 565, and this mask
-        // prevents us from using kIndex8. The below code depends on the output
-        // being an SkPMColor.
-        SkASSERT(dstInfo.colorType() == kN32_SkColorType);
+    // Finished decoding the entire image
+    return kSuccess;
+}
 
-        // The AND mask is always 1 bit per pixel
-        const size_t rowBytes = SkAlign4(compute_row_bytes(width, 1));
+// TODO (msarett): This function will need to be modified in order to perform row by row decodes
+//                 when the Ico scanline decoder is implemented.
+SkCodec::Result SkBmpStandardCodec::decodeIcoMask(const SkImageInfo& dstInfo,
+        void* dst, size_t dstRowBytes) {
+    // BMP in ICO have transparency, so this cannot be 565, and this mask
+    // prevents us from using kIndex8. The below code depends on the output
+    // being an SkPMColor.
+    SkASSERT(dstInfo.colorType() == kN32_SkColorType);
 
-        SkPMColor* dstPtr = (SkPMColor*) dst;
-        for (int y = 0; y < height; y++) {
-            // The srcBuffer will at least be large enough
-            if (stream()->read(fSrcBuffer.get(), rowBytes) != rowBytes) {
-                SkCodecPrintf("Warning: incomplete AND mask for bmp-in-ico.\n");
-                return kIncompleteInput;
-            }
+    // The AND mask is always 1 bit per pixel
+    const int width = this->getInfo().width();
+    const size_t rowBytes = SkAlign4(compute_row_bytes(width, 1));
 
-            int row;
-            if (SkBmpCodec::kBottomUp_RowOrder == this->rowOrder()) {
-                row = height - y - 1;
-            } else {
-                row = y;
-            }
+    SkPMColor* dstPtr = (SkPMColor*) dst;
+    for (int y = 0; y < dstInfo.height(); y++) {
+        // The srcBuffer will at least be large enough
+        if (stream()->read(fSrcBuffer.get(), rowBytes) != rowBytes) {
+            SkCodecPrintf("Warning: incomplete AND mask for bmp-in-ico.\n");
+            return kIncompleteInput;
+        }
 
-            SkPMColor* dstRow =
-                    SkTAddOffset<SkPMColor>(dstPtr, row * dstRowBytes);
+        int row = this->getDstRow(y, dstInfo.height());
 
-            for (int x = 0; x < width; x++) {
-                int quotient;
-                int modulus;
-                SkTDivMod(x, 8, &quotient, &modulus);
-                uint32_t shift = 7 - modulus;
-                uint32_t alphaBit =
-                        (fSrcBuffer.get()[quotient] >> shift) & 0x1;
-                dstRow[x] &= alphaBit - 1;
-            }
+        SkPMColor* dstRow =
+                SkTAddOffset<SkPMColor>(dstPtr, row * dstRowBytes);
+
+        for (int x = 0; x < width; x++) {
+            int quotient;
+            int modulus;
+            SkTDivMod(x, 8, &quotient, &modulus);
+            uint32_t shift = 7 - modulus;
+            uint32_t alphaBit =
+                    (fSrcBuffer.get()[quotient] >> shift) & 0x1;
+            dstRow[x] &= alphaBit - 1;
         }
     }
-
-    // Finished decoding the entire image
     return kSuccess;
 }
diff --git a/src/codec/SkBmpStandardCodec.h b/src/codec/SkBmpStandardCodec.h
index a7d48c8..fcc246f 100644
--- a/src/codec/SkBmpStandardCodec.h
+++ b/src/codec/SkBmpStandardCodec.h
@@ -36,8 +36,9 @@
      * @param rowOrder indicates whether rows are ordered top-down or bottom-up
      */
     SkBmpStandardCodec(const SkImageInfo& srcInfo, SkStream* stream,
-               uint16_t bitsPerPixel, uint32_t numColors, uint32_t bytesPerColor,
-               uint32_t offset, SkBmpCodec::RowOrder rowOrder, bool isIco);
+            uint16_t bitsPerPixel, uint32_t numColors, uint32_t bytesPerColor,
+            uint32_t offset, SkScanlineDecoder::SkScanlineOrder rowOrder,
+            bool isIco);
 
 protected:
 
@@ -48,6 +49,11 @@
     bool onInIco() const override {
         return fInIco;
     }
+
+    SkCodec::Result prepareToDecode(const SkImageInfo& dstInfo,
+            const SkCodec::Options& options, SkPMColor inputColorPtr[],
+            int* inputColorCount) override;
+
 private:
 
     /*
@@ -58,13 +64,17 @@
 
     bool initializeSwizzler(const SkImageInfo& dstInfo, const Options& opts);
 
-    Result decode(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes, const Options& opts);
+    Result decodeRows(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes,
+                      const Options& opts) override;
+
+    Result decodeIcoMask(const SkImageInfo& dstInfo, void* dst, size_t dstRowBytes);
 
     SkAutoTUnref<SkColorTable>          fColorTable;     // owned
     const uint32_t                      fNumColors;
     const uint32_t                      fBytesPerColor;
     const uint32_t                      fOffset;
     SkAutoTDelete<SkSwizzler>           fSwizzler;
+    const size_t                        fSrcRowBytes;
     SkAutoTDeleteArray<uint8_t>         fSrcBuffer;
     const bool                          fInIco;
 
diff --git a/src/codec/SkCodecPriv.h b/src/codec/SkCodecPriv.h
index 9a28cfd..7260741 100644
--- a/src/codec/SkCodecPriv.h
+++ b/src/codec/SkCodecPriv.h
@@ -30,6 +30,45 @@
 #define COMPUTE_RESULT_ALPHA                    \
     SkSwizzler::GetResult(zeroAlpha, maxAlpha);
 
+/*
+ * Returns the first coordinate that we will keep during a scaled decode.
+ * The output can be interpreted as an x-coordinate or a y-coordinate.
+ *
+ * This does not need to be called and is not called when sampleFactor == 1.
+ */
+static int get_start_coord(int sampleFactor) { return sampleFactor / 2; };
+
+/*
+ * Given a coordinate in the original image, this returns the corresponding
+ * coordinate in the scaled image.  This function is meaningless if
+ * IsCoordNecessary returns false.
+ * The output can be interpreted as an x-coordinate or a y-coordinate.
+ *
+ * This does not need to be called and is not called when sampleFactor == 1.
+ */
+static int get_dst_coord(int srcCoord, int sampleFactor) { return srcCoord / sampleFactor; };
+
+/*
+ * When scaling, we will discard certain y-coordinates (rows) and
+ * x-coordinates (columns).  This function returns true if we should keep the
+ * coordinate and false otherwise.
+ * The inputs may be x-coordinates or y-coordinates.
+ *
+ * This does not need to be called and is not called when sampleFactor == 1.
+ */
+static bool is_coord_necessary(int srcCoord, int sampleFactor, int scaledDim) {
+    // Get the first coordinate that we want to keep
+    int startCoord = get_start_coord(sampleFactor);
+
+    // Return false on edge cases
+    if (srcCoord < startCoord || get_dst_coord(srcCoord, sampleFactor) >= scaledDim) {
+        return false;
+    }
+
+    // Every sampleFactor rows are necessary
+    return ((srcCoord - startCoord) % sampleFactor) == 0;
+}
+
 static inline bool valid_alpha(SkAlphaType dstAlpha, SkAlphaType srcAlpha) {
     // Check for supported alpha types
     if (srcAlpha != dstAlpha) {
@@ -132,6 +171,23 @@
 }
 
 /*
+ * On incomplete images, get the color to fill with
+ */
+static inline SkPMColor get_fill_color_or_index(SkAlphaType alphaType) {
+    // This condition works properly for all supported output color types.
+    // kIndex8: The low 8-bits of both possible return values is 0, which is
+    //          our desired default index.
+    // kGray8:  The low 8-bits of both possible return values is 0, which is
+    //          black, our desired fill value.
+    // kRGB565: The low 16-bits of both possible return values is 0, which is
+    //          black, our desired fill value.
+    // kN32:    Return black for opaque images and transparent for non-opaque
+    //          images.
+    return kOpaque_SkAlphaType == alphaType ?
+            SK_ColorBLACK : SK_ColorTRANSPARENT;
+}
+
+/*
  * Get a byte from a buffer
  * This method is unsafe, the caller is responsible for performing a check
  */
diff --git a/src/codec/SkCodec_libgif.cpp b/src/codec/SkCodec_libgif.cpp
index 3e78f23..5f1bec0 100644
--- a/src/codec/SkCodec_libgif.cpp
+++ b/src/codec/SkCodec_libgif.cpp
@@ -382,14 +382,6 @@
                     }
                 }
 
-                // Check if we can skip filling the background of the image.  We
-                // may be able to if the memory is zero initialized.
-                bool skipBackground =
-                        ((kN32_SkColorType == dstColorType && colorTable[fillIndex] == 0) ||
-                        (kIndex_8_SkColorType == dstColorType && fillIndex == 0)) &&
-                        kYes_ZeroInitialized == zeroInit;
-
-
                 // Fill in the color table for indices greater than color count.
                 // This allows for predictable, safe behavior.
                 for (uint32_t i = colorCount; i < maxColors; i++) {
@@ -407,9 +399,8 @@
                     // FIXME: This may not be the behavior that we want for
                     //        animated gifs where we draw on top of the
                     //        previous frame.
-                    if (!skipBackground) {
-                        SkSwizzler::Fill(dst, dstInfo, dstRowBytes, height, fillIndex, colorTable);
-                    }
+                    SkSwizzler::Fill(dst, dstInfo, dstRowBytes, height, fillIndex, colorTable,
+                            zeroInit);
 
                     // Modify the dst pointer
                     const int32_t dstBytesPerPixel = SkColorTypeBytesPerPixel(dstColorType);
@@ -436,13 +427,11 @@
                     for (int32_t y = 0; y < innerHeight; y++) {
                         if (GIF_ERROR == DGifGetLine(fGif, buffer.get(), innerWidth)) {
                             // Recover from error by filling remainder of image
-                            if (!skipBackground) {
-                                memset(buffer.get(), fillIndex, innerWidth);
-                                for (; y < innerHeight; y++) {
-                                    void* dstRow = SkTAddOffset<void>(dst, dstRowBytes *
-                                            get_output_row_interlaced(y, innerHeight));
-                                    swizzler->swizzle(dstRow, buffer.get());
-                                }
+                            memset(buffer.get(), fillIndex, innerWidth);
+                            for (; y < innerHeight; y++) {
+                                void* dstRow = SkTAddOffset<void>(dst, dstRowBytes *
+                                        get_output_row_interlaced(y, innerHeight));
+                                swizzler->swizzle(dstRow, buffer.get());
                             }
                             return gif_error(SkStringPrintf(
                                     "Could not decode line %d of %d.\n",
@@ -457,10 +446,8 @@
                     void* dstRow = dst;
                     for (int32_t y = 0; y < innerHeight; y++) {
                         if (GIF_ERROR == DGifGetLine(fGif, buffer.get(), innerWidth)) {
-                            if (!skipBackground) {
-                                SkSwizzler::Fill(dstRow, dstInfo, dstRowBytes,
-                                                 innerHeight - y, fillIndex, colorTable);
-                            }
+                            SkSwizzler::Fill(dstRow, dstInfo, dstRowBytes, innerHeight - y,
+                                    fillIndex, colorTable, zeroInit);
                             return gif_error(SkStringPrintf(
                                     "Could not decode line %d of %d.\n",
                                     y, height - 1).c_str(), kIncompleteInput);
diff --git a/src/codec/SkCodec_libpng.cpp b/src/codec/SkCodec_libpng.cpp
index 2e54342..2ba2d5b 100644
--- a/src/codec/SkCodec_libpng.cpp
+++ b/src/codec/SkCodec_libpng.cpp
@@ -739,8 +739,8 @@
 
     bool onReallyHasAlpha() const override { return fHasAlpha; }
 
-    bool onRequiresPostYSampling() override {
-        return true;
+    SkScanlineOrder onGetScanlineOrder() const override {
+        return kNone_SkScanlineOrder;
     }
 
     SkEncodedFormat onGetEncodedFormat() const override { 
diff --git a/src/codec/SkJpegCodec.cpp b/src/codec/SkJpegCodec.cpp
index ed3944c..d985337 100644
--- a/src/codec/SkJpegCodec.cpp
+++ b/src/codec/SkJpegCodec.cpp
@@ -355,11 +355,8 @@
             // If the destination is kRGB_565, the low 16 bits of SK_ColorBLACK
             // will be used.  Conveniently, these are zeros, which is the
             // representation for black in kRGB_565.
-            if (kNo_ZeroInitialized == options.fZeroInitialized ||
-                    kN32_SkColorType == dstInfo.colorType()) {
-                SkSwizzler::Fill(dstRow, dstInfo, dstRowBytes, dstHeight - y,
-                        SK_ColorBLACK, nullptr);
-            }
+            SkSwizzler::Fill(dstRow, dstInfo, dstRowBytes, dstHeight - y,
+                    SK_ColorBLACK, nullptr, options.fZeroInitialized);
 
             // Prevent libjpeg from failing on incomplete decode
             dinfo->output_scanline = dstHeight;
@@ -516,11 +513,8 @@
             uint32_t rowsDecoded =
                     chromium_jpeg_read_scanlines(fCodec->fDecoderMgr->dinfo(), &dstRow, 1);
             if (rowsDecoded != 1) {
-                if (SkCodec::kNo_ZeroInitialized == fOpts.fZeroInitialized ||
-                        kN32_SkColorType == this->dstInfo().colorType()) {
-                    SkSwizzler::Fill(dstRow, this->dstInfo(), rowBytes,
-                            count - y, SK_ColorBLACK, nullptr);
-                }
+                SkSwizzler::Fill(dstRow, this->dstInfo(), rowBytes, count - y,
+                        SK_ColorBLACK, nullptr, fOpts.fZeroInitialized);
                 fCodec->fDecoderMgr->dinfo()->output_scanline = this->dstInfo().height();
                 return SkCodec::kIncompleteInput;
             }
diff --git a/src/codec/SkMaskSwizzler.cpp b/src/codec/SkMaskSwizzler.cpp
index 58be0c6..6ca9b58 100644
--- a/src/codec/SkMaskSwizzler.cpp
+++ b/src/codec/SkMaskSwizzler.cpp
@@ -8,57 +8,64 @@
 #include "SkCodecPriv.h"
 #include "SkColorPriv.h"
 #include "SkMaskSwizzler.h"
+#include "SkScaledCodec.h"
 
 static SkSwizzler::ResultAlpha swizzle_mask16_to_n32_opaque(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
-    uint16_t* srcPtr = (uint16_t*) srcRow;
+    uint16_t* srcPtr = ((uint16_t*) srcRow) + startX;
     SkPMColor* dstPtr = (SkPMColor*) dstRow;
     for (int i = 0; i < width; i++) {
-        uint16_t p = srcPtr[i];
+        uint16_t p = srcPtr[0];
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         dstPtr[i] = SkPackARGB32NoCheck(0xFF, red, green, blue);
+        srcPtr += sampleX;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
 
 static SkSwizzler::ResultAlpha swizzle_mask16_to_n32_unpremul(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
-    uint16_t* srcPtr = (uint16_t*) srcRow;
+    uint16_t* srcPtr = ((uint16_t*) srcRow) + startX;
     SkPMColor* dstPtr = (SkPMColor*) dstRow;
     INIT_RESULT_ALPHA;
     for (int i = 0; i < width; i++) {
-        uint16_t p = srcPtr[i];
+        uint16_t p = srcPtr[0];
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         uint8_t alpha = masks->getAlpha(p);
         UPDATE_RESULT_ALPHA(alpha);
         dstPtr[i] = SkPackARGB32NoCheck(alpha, red, green, blue);
+        srcPtr += sampleX;
     }
     return COMPUTE_RESULT_ALPHA;
 }
 
 static SkSwizzler::ResultAlpha swizzle_mask16_to_n32_premul(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
-    uint16_t* srcPtr = (uint16_t*) srcRow;
+    uint16_t* srcPtr = ((uint16_t*) srcRow) + startX;
     SkPMColor* dstPtr = (SkPMColor*) dstRow;
     INIT_RESULT_ALPHA;
     for (int i = 0; i < width; i++) {
-        uint16_t p = srcPtr[i];
+        uint16_t p = srcPtr[0];
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         uint8_t alpha = masks->getAlpha(p);
         UPDATE_RESULT_ALPHA(alpha);
         dstPtr[i] = SkPreMultiplyARGB(alpha, red, green, blue);
+        srcPtr += sampleX;
     }
     return COMPUTE_RESULT_ALPHA;
 }
@@ -66,152 +73,174 @@
 // TODO (msarett): We have promoted a two byte per pixel image to 8888, only to
 // convert it back to 565. Instead, we should swizzle to 565 directly.
 static SkSwizzler::ResultAlpha swizzle_mask16_to_565(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
-    uint16_t* srcPtr = (uint16_t*) srcRow;
+    uint16_t* srcPtr = ((uint16_t*) srcRow) + startX;
     uint16_t* dstPtr = (uint16_t*) dstRow;
     for (int i = 0; i < width; i++) {
-        uint16_t p = srcPtr[i];
+        uint16_t p = srcPtr[0];
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         dstPtr[i] = SkPack888ToRGB16(red, green, blue);
+        srcPtr += sampleX;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
 
 static SkSwizzler::ResultAlpha swizzle_mask24_to_n32_opaque(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
+    srcRow += 3 * startX;
     SkPMColor* dstPtr = (SkPMColor*) dstRow;
-    for (int i = 0; i < 3*width; i += 3) {
-        uint32_t p = srcRow[i] | (srcRow[i + 1] << 8) | srcRow[i + 2] << 16;
+    for (int i = 0; i < width; i++) {
+        uint32_t p = srcRow[0] | (srcRow[1] << 8) | srcRow[2] << 16;
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
-        dstPtr[i/3] = SkPackARGB32NoCheck(0xFF, red, green, blue);
+        dstPtr[i] = SkPackARGB32NoCheck(0xFF, red, green, blue);
+        srcRow += 3 * sampleX;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
 
 static SkSwizzler::ResultAlpha swizzle_mask24_to_n32_unpremul(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
-    SkPMColor* dstPtr = (SkPMColor*) dstRow;
-    INIT_RESULT_ALPHA;
-    for (int i = 0; i < 3*width; i += 3) {
-        uint32_t p = srcRow[i] | (srcRow[i + 1] << 8) | srcRow[i + 2] << 16;
-        uint8_t red = masks->getRed(p);
-        uint8_t green = masks->getGreen(p);
-        uint8_t blue = masks->getBlue(p);
-        uint8_t alpha = masks->getAlpha(p);
-        UPDATE_RESULT_ALPHA(alpha);
-        dstPtr[i/3] = SkPackARGB32NoCheck(alpha, red, green, blue);
-    }
-    return COMPUTE_RESULT_ALPHA;
-}
-
-static SkSwizzler::ResultAlpha swizzle_mask24_to_n32_premul(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
-
-    // Use the masks to decode to the destination
-    SkPMColor* dstPtr = (SkPMColor*) dstRow;
-    INIT_RESULT_ALPHA;
-    for (int i = 0; i < 3*width; i += 3) {
-        uint32_t p = srcRow[i] | (srcRow[i + 1] << 8) | srcRow[i + 2] << 16;
-        uint8_t red = masks->getRed(p);
-        uint8_t green = masks->getGreen(p);
-        uint8_t blue = masks->getBlue(p);
-        uint8_t alpha = masks->getAlpha(p);
-        UPDATE_RESULT_ALPHA(alpha);
-        dstPtr[i/3] = SkPreMultiplyARGB(alpha, red, green, blue);
-    }
-    return COMPUTE_RESULT_ALPHA;
-}
-
-static SkSwizzler::ResultAlpha swizzle_mask24_to_565(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
-
-    // Use the masks to decode to the destination
-    uint16_t* dstPtr = (uint16_t*) dstRow;
-    for (int i = 0; i < 3*width; i += 3) {
-        uint32_t p = srcRow[i] | (srcRow[i + 1] << 8) | srcRow[i + 2] << 16;
-        uint8_t red = masks->getRed(p);
-        uint8_t green = masks->getGreen(p);
-        uint8_t blue = masks->getBlue(p);
-        dstPtr[i/3] = SkPack888ToRGB16(red, green, blue);
-    }
-    return SkSwizzler::kOpaque_ResultAlpha;
-}
-
-static SkSwizzler::ResultAlpha swizzle_mask32_to_n32_opaque(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
-
-    // Use the masks to decode to the destination
-    uint32_t* srcPtr = (uint32_t*) srcRow;
-    SkPMColor* dstPtr = (SkPMColor*) dstRow;
-    for (int i = 0; i < width; i++) {
-        uint32_t p = srcPtr[i];
-        uint8_t red = masks->getRed(p);
-        uint8_t green = masks->getGreen(p);
-        uint8_t blue = masks->getBlue(p);
-        dstPtr[i] = SkPackARGB32NoCheck(0xFF, red, green, blue);
-    }
-    return SkSwizzler::kOpaque_ResultAlpha;
-}
-
-static SkSwizzler::ResultAlpha swizzle_mask32_to_n32_unpremul(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
-
-    // Use the masks to decode to the destination
-    uint32_t* srcPtr = (uint32_t*) srcRow;
+    srcRow += 3 * startX;
     SkPMColor* dstPtr = (SkPMColor*) dstRow;
     INIT_RESULT_ALPHA;
     for (int i = 0; i < width; i++) {
-        uint32_t p = srcPtr[i];
+        uint32_t p = srcRow[0] | (srcRow[1] << 8) | srcRow[2] << 16;
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         uint8_t alpha = masks->getAlpha(p);
         UPDATE_RESULT_ALPHA(alpha);
         dstPtr[i] = SkPackARGB32NoCheck(alpha, red, green, blue);
+        srcRow += 3 * sampleX;
     }
     return COMPUTE_RESULT_ALPHA;
 }
 
-static SkSwizzler::ResultAlpha swizzle_mask32_to_n32_premul(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+static SkSwizzler::ResultAlpha swizzle_mask24_to_n32_premul(
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
 
     // Use the masks to decode to the destination
-    uint32_t* srcPtr = (uint32_t*) srcRow;
+    srcRow += 3 * startX;
     SkPMColor* dstPtr = (SkPMColor*) dstRow;
     INIT_RESULT_ALPHA;
     for (int i = 0; i < width; i++) {
-        uint32_t p = srcPtr[i];
+        uint32_t p = srcRow[0] | (srcRow[1] << 8) | srcRow[2] << 16;
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         uint8_t alpha = masks->getAlpha(p);
         UPDATE_RESULT_ALPHA(alpha);
         dstPtr[i] = SkPreMultiplyARGB(alpha, red, green, blue);
+        srcRow += 3 * sampleX;
+    }
+    return COMPUTE_RESULT_ALPHA;
+}
+
+static SkSwizzler::ResultAlpha swizzle_mask24_to_565(
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
+
+    // Use the masks to decode to the destination
+    srcRow += 3 * startX;
+    uint16_t* dstPtr = (uint16_t*) dstRow;
+    for (int i = 0; i < width; i++) {
+        uint32_t p = srcRow[0] | (srcRow[1] << 8) | srcRow[2] << 16;
+        uint8_t red = masks->getRed(p);
+        uint8_t green = masks->getGreen(p);
+        uint8_t blue = masks->getBlue(p);
+        dstPtr[i] = SkPack888ToRGB16(red, green, blue);
+        srcRow += 3 * sampleX;
+    }
+    return SkSwizzler::kOpaque_ResultAlpha;
+}
+
+static SkSwizzler::ResultAlpha swizzle_mask32_to_n32_opaque(
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
+
+    // Use the masks to decode to the destination
+    uint32_t* srcPtr = ((uint32_t*) srcRow) + startX;
+    SkPMColor* dstPtr = (SkPMColor*) dstRow;
+    for (int i = 0; i < width; i++) {
+        uint32_t p = srcPtr[0];
+        uint8_t red = masks->getRed(p);
+        uint8_t green = masks->getGreen(p);
+        uint8_t blue = masks->getBlue(p);
+        dstPtr[i] = SkPackARGB32NoCheck(0xFF, red, green, blue);
+        srcPtr += sampleX;
+    }
+    return SkSwizzler::kOpaque_ResultAlpha;
+}
+
+static SkSwizzler::ResultAlpha swizzle_mask32_to_n32_unpremul(
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
+
+    // Use the masks to decode to the destination
+    uint32_t* srcPtr = ((uint32_t*) srcRow) + startX;
+    SkPMColor* dstPtr = (SkPMColor*) dstRow;
+    INIT_RESULT_ALPHA;
+    for (int i = 0; i < width; i++) {
+        uint32_t p = srcPtr[0];
+        uint8_t red = masks->getRed(p);
+        uint8_t green = masks->getGreen(p);
+        uint8_t blue = masks->getBlue(p);
+        uint8_t alpha = masks->getAlpha(p);
+        UPDATE_RESULT_ALPHA(alpha);
+        dstPtr[i] = SkPackARGB32NoCheck(alpha, red, green, blue);
+        srcPtr += sampleX;
+    }
+    return COMPUTE_RESULT_ALPHA;
+}
+
+static SkSwizzler::ResultAlpha swizzle_mask32_to_n32_premul(
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
+
+    // Use the masks to decode to the destination
+    uint32_t* srcPtr = ((uint32_t*) srcRow) + startX;
+    SkPMColor* dstPtr = (SkPMColor*) dstRow;
+    INIT_RESULT_ALPHA;
+    for (int i = 0; i < width; i++) {
+        uint32_t p = srcPtr[0];
+        uint8_t red = masks->getRed(p);
+        uint8_t green = masks->getGreen(p);
+        uint8_t blue = masks->getBlue(p);
+        uint8_t alpha = masks->getAlpha(p);
+        UPDATE_RESULT_ALPHA(alpha);
+        dstPtr[i] = SkPreMultiplyARGB(alpha, red, green, blue);
+        srcPtr += sampleX;
     }
     return COMPUTE_RESULT_ALPHA;
 }
 
 static SkSwizzler::ResultAlpha swizzle_mask32_to_565(
-        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks) {
+        void* dstRow, const uint8_t* srcRow, int width, SkMasks* masks,
+        uint32_t startX, uint32_t sampleX) {
     // Use the masks to decode to the destination
-    uint32_t* srcPtr = (uint32_t*) srcRow;
+    uint32_t* srcPtr = ((uint32_t*) srcRow) + startX;
     uint16_t* dstPtr = (uint16_t*) dstRow;
     for (int i = 0; i < width; i++) {
-        uint32_t p = srcPtr[i];
+        uint32_t p = srcPtr[0];
         uint8_t red = masks->getRed(p);
         uint8_t green = masks->getGreen(p);
         uint8_t blue = masks->getBlue(p);
         dstPtr[i] = SkPack888ToRGB16(red, green, blue);
+        srcPtr += sampleX;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
@@ -222,15 +251,16 @@
  *
  */
 SkMaskSwizzler* SkMaskSwizzler::CreateMaskSwizzler(
-        const SkImageInfo& info, SkMasks* masks, uint32_t bitsPerPixel) {
+        const SkImageInfo& dstInfo, const SkImageInfo& srcInfo, SkMasks* masks,
+        uint32_t bitsPerPixel) {
 
     // Choose the appropriate row procedure
     RowProc proc = nullptr;
     switch (bitsPerPixel) {
         case 16:
-            switch (info.colorType()) {
+            switch (dstInfo.colorType()) {
                 case kN32_SkColorType:
-                    switch (info.alphaType()) {
+                    switch (dstInfo.alphaType()) {
                         case kUnpremul_SkAlphaType:
                             proc = &swizzle_mask16_to_n32_unpremul;
                             break;
@@ -245,7 +275,7 @@
                     }
                     break;
                 case kRGB_565_SkColorType:
-                    switch (info.alphaType()) {
+                    switch (dstInfo.alphaType()) {
                         case kOpaque_SkAlphaType:
                             proc = &swizzle_mask16_to_565;
                             break;
@@ -258,9 +288,9 @@
             }
             break;
         case 24:
-            switch (info.colorType()) {
+            switch (dstInfo.colorType()) {
                 case kN32_SkColorType:
-                    switch (info.alphaType()) {
+                    switch (dstInfo.alphaType()) {
                         case kUnpremul_SkAlphaType:
                             proc = &swizzle_mask24_to_n32_unpremul;
                             break;
@@ -275,7 +305,7 @@
                     }
                     break;
                 case kRGB_565_SkColorType:
-                    switch (info.alphaType()) {
+                    switch (dstInfo.alphaType()) {
                         case kOpaque_SkAlphaType:
                             proc = &swizzle_mask24_to_565;
                             break;
@@ -288,9 +318,9 @@
             }
             break;
         case 32:
-            switch (info.colorType()) {
+            switch (dstInfo.colorType()) {
                 case kN32_SkColorType:
-                    switch (info.alphaType()) {
+                    switch (dstInfo.alphaType()) {
                         case kUnpremul_SkAlphaType:
                             proc = &swizzle_mask32_to_n32_unpremul;
                             break;
@@ -305,7 +335,7 @@
                     }
                     break;
                 case kRGB_565_SkColorType:
-                    switch (info.alphaType()) {
+                    switch (dstInfo.alphaType()) {
                         case kOpaque_SkAlphaType:
                             proc = &swizzle_mask32_to_565;
                             break;
@@ -321,7 +351,12 @@
             SkASSERT(false);
             return nullptr;
     }
-    return new SkMaskSwizzler(info, masks, proc);
+
+    // Get the sample size
+    int sampleX;
+    SkScaledCodec::ComputeSampleSize(dstInfo, srcInfo, &sampleX, NULL);
+
+    return new SkMaskSwizzler(dstInfo, masks, proc, sampleX);
 }
 
 /*
@@ -330,10 +365,12 @@
  *
  */
 SkMaskSwizzler::SkMaskSwizzler(const SkImageInfo& dstInfo, SkMasks* masks,
-                               RowProc proc)
+                               RowProc proc, uint32_t sampleX)
     : fDstInfo(dstInfo)
     , fMasks(masks)
     , fRowProc(proc)
+    , fSampleX(sampleX)
+    , fStartX(get_start_coord(sampleX))
 {}
 
 /*
@@ -343,5 +380,5 @@
  */
 SkSwizzler::ResultAlpha SkMaskSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) {
     SkASSERT(nullptr != dst && nullptr != src);
-    return fRowProc(dst, src, fDstInfo.width(), fMasks);
+    return fRowProc(dst, src, fDstInfo.width(), fMasks, fStartX, fSampleX);
 }
diff --git a/src/codec/SkMaskSwizzler.h b/src/codec/SkMaskSwizzler.h
index 9f4dd44..794dcd1 100644
--- a/src/codec/SkMaskSwizzler.h
+++ b/src/codec/SkMaskSwizzler.h
@@ -24,7 +24,8 @@
      * Create a new swizzler
      * @param masks Unowned pointer to helper class
      */
-    static SkMaskSwizzler* CreateMaskSwizzler(const SkImageInfo& imageInfo,
+    static SkMaskSwizzler* CreateMaskSwizzler(const SkImageInfo& dstInfo,
+                                              const SkImageInfo& srcInfo,
                                               SkMasks* masks,
                                               uint32_t bitsPerPixel);
 
@@ -40,17 +41,20 @@
      */
     typedef SkSwizzler::ResultAlpha (*RowProc)(
             void* dstRow, const uint8_t* srcRow, int width,
-            SkMasks* masks);
+            SkMasks* masks, uint32_t startX, uint32_t sampleX);
 
     /*
      * Constructor for mask swizzler
      */
-    SkMaskSwizzler(const SkImageInfo& info, SkMasks* masks, RowProc proc);
+    SkMaskSwizzler(const SkImageInfo& info, SkMasks* masks, RowProc proc,
+            uint32_t sampleX);
 
     // Fields
     const SkImageInfo& fDstInfo;
     SkMasks*           fMasks;       // unowned
     const RowProc      fRowProc;
+    const uint32_t     fSampleX;
+    const uint32_t     fStartX;
 };
 
 #endif
diff --git a/src/codec/SkScaledCodec.cpp b/src/codec/SkScaledCodec.cpp
index 7ff279c..a9e067e 100644
--- a/src/codec/SkScaledCodec.cpp
+++ b/src/codec/SkScaledCodec.cpp
@@ -198,7 +198,6 @@
     if (kSuccess == result) {
         // native decode supported
         return fScanlineDecoder->getScanlines(dst, requestedInfo.height(), rowBytes);
-
     }
 
     if (kInvalidScale != result) {
@@ -213,7 +212,7 @@
         return kInvalidScale;
     }
     // set first sample pixel in y direction
-    int Y0 = sampleY >> 1;
+    int Y0 = get_start_coord(sampleY);
 
     int dstHeight = requestedInfo.height();
     int srcHeight = fScanlineDecoder->getInfo().height();
@@ -227,41 +226,64 @@
     if (kSuccess != result) {
         return result;
     }
-    
-    const bool requiresPostYSampling = fScanlineDecoder->requiresPostYSampling();
 
-    if (requiresPostYSampling) {
-        SkAutoMalloc storage(srcHeight * rowBytes);
-        uint8_t* storagePtr = static_cast<uint8_t*>(storage.get());
-        result = fScanlineDecoder->getScanlines(storagePtr, srcHeight, rowBytes);
-        if (kSuccess != result) {
-            return result;
+    switch(fScanlineDecoder->getScanlineOrder()) {
+        case SkScanlineDecoder::kTopDown_SkScanlineOrder: {
+            result = fScanlineDecoder->skipScanlines(Y0);
+            if (kSuccess != result && kIncompleteInput != result) {
+                return result;
+            }
+            for (int y = 0; y < dstHeight; y++) {
+                result = fScanlineDecoder->getScanlines(dst, 1, rowBytes);
+                if (kSuccess != result && kIncompleteInput != result) {
+                    return result;
+                }
+                if (y < dstHeight - 1) {
+                    result = fScanlineDecoder->skipScanlines(sampleY - 1);
+                    if (kSuccess != result && kIncompleteInput != result) {
+                        return result;
+                    }
+                }
+                dst = SkTAddOffset<void>(dst, rowBytes);
+            }
+            return kSuccess;
         }
-        storagePtr += Y0 * rowBytes;
-        for (int y = 0; y < dstHeight; y++) {
-            memcpy(dst, storagePtr, rowBytes);
-            storagePtr += sampleY * rowBytes;
-            dst = SkTAddOffset<void>(dst, rowBytes);
+        case SkScanlineDecoder::kBottomUp_SkScanlineOrder:
+        case SkScanlineDecoder::kOutOfOrder_SkScanlineOrder: {
+            for (int y = 0; y < srcHeight; y++) {
+                int srcY = fScanlineDecoder->getY();
+                if (is_coord_necessary(srcY, sampleY, dstHeight)) {
+                    void* dstPtr = SkTAddOffset<void>(dst, rowBytes * get_dst_coord(srcY, sampleY));
+                    result = fScanlineDecoder->getScanlines(dstPtr, 1, rowBytes);
+                    if (kSuccess != result && kIncompleteInput != result) {
+                        return result;
+                    }
+                } else {
+                    result = fScanlineDecoder->skipScanlines(1);
+                    if (kSuccess != result && kIncompleteInput != result) {
+                        return result;
+                    }
+                }
+            }
+            return kSuccess;
         }
-    } else {
-        // does not require post y sampling
-        result = fScanlineDecoder->skipScanlines(Y0);
-        if (kSuccess != result) {
-            return result;
-        }
-        for (int y = 0; y < dstHeight; y++) {
-            result = fScanlineDecoder->getScanlines(dst, 1, rowBytes);
+        case SkScanlineDecoder::kNone_SkScanlineOrder: {
+            SkAutoMalloc storage(srcHeight * rowBytes);
+            uint8_t* storagePtr = static_cast<uint8_t*>(storage.get());
+            result = fScanlineDecoder->getScanlines(storagePtr, srcHeight, rowBytes);
             if (kSuccess != result) {
                 return result;
             }
-            if (y < dstHeight - 1) {
-                result = fScanlineDecoder->skipScanlines(sampleY - 1);
-                if (kSuccess != result) {
-                    return result;
-                }
+            storagePtr += Y0 * rowBytes;
+            for (int y = 0; y < dstHeight; y++) {
+                memcpy(dst, storagePtr, rowBytes);
+                storagePtr += sampleY * rowBytes;
+                dst = SkTAddOffset<void>(dst, rowBytes);
             }
-            dst = SkTAddOffset<void>(dst, rowBytes);
+            return kSuccess;
         }
+        default:
+            SkASSERT(false);
+            return kUnimplemented;
     }
-    return kSuccess;
 }
diff --git a/src/codec/SkScanlineDecoder.cpp b/src/codec/SkScanlineDecoder.cpp
index 859956f..6a8ce30 100644
--- a/src/codec/SkScanlineDecoder.cpp
+++ b/src/codec/SkScanlineDecoder.cpp
@@ -6,6 +6,7 @@
  */
 
 #include "SkScanlineDecoder.h"
+#include "SkBmpCodec.h"
 #include "SkCodec_libpng.h"
 #include "SkCodec_wbmp.h"
 #include "SkCodecPriv.h"
@@ -23,6 +24,7 @@
 #ifndef SK_BUILD_FOR_ANDROID_FRAMEWORK
     { SkJpegCodec::IsJpeg, SkJpegCodec::NewSDFromStream },
 #endif
+    { SkBmpCodec::IsBmp, SkBmpCodec::NewSDFromStream },
     { SkWbmpCodec::IsWbmp, SkWbmpCodec::NewSDFromStream },
 };
 
@@ -94,6 +96,7 @@
 
     fCurrScanline = 0;
     fDstInfo = dstInfo;
+    fOptions = *options;
     return SkCodec::kSuccess;
 }
 
diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp
index 54d9764..a3f1488 100644
--- a/src/codec/SkSwizzler.cpp
+++ b/src/codec/SkSwizzler.cpp
@@ -22,7 +22,7 @@
 
 // samples the row. Does not do anything else but sampling
 static SkSwizzler::ResultAlpha sample565(void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src,
-        int width, int deltaSrc, int offset, const SkPMColor ctable[]){
+        int width, int bpp, int deltaSrc, int offset, const SkPMColor ctable[]){
 
     src += offset;
     uint16_t* SK_RESTRICT dst = (uint16_t*) dstRow;
@@ -34,6 +34,8 @@
     return SkSwizzler::kOpaque_ResultAlpha;
 }
 
+// TODO (msarett): Investigate SIMD optimizations for swizzle routines.
+
 // kBit
 // These routines exclusively choose between white and black
 
@@ -44,7 +46,7 @@
 // same as swizzle_bit_to_index and swizzle_bit_to_n32 except for value assigned to dst[x]
 static SkSwizzler::ResultAlpha swizzle_bit_to_grayscale(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
+        int bpp, int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
 
     uint8_t* SK_RESTRICT dst = (uint8_t*) dstRow;
 
@@ -71,7 +73,7 @@
 // same as swizzle_bit_to_grayscale and swizzle_bit_to_n32 except for value assigned to dst[x]
 static SkSwizzler::ResultAlpha swizzle_bit_to_index(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
+        int bpp, int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
     uint8_t* SK_RESTRICT dst = (uint8_t*) dstRow;
 
     // increment src by byte offset and bitIndex by bit offset
@@ -94,7 +96,7 @@
 // same as swizzle_bit_to_grayscale and swizzle_bit_to_index except for value assigned to dst[x]
 static SkSwizzler::ResultAlpha swizzle_bit_to_n32(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
+        int bpp, int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
     SkPMColor* SK_RESTRICT dst = (SkPMColor*) dstRow;
 
     // increment src by byte offset and bitIndex by bit offset
@@ -119,7 +121,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_bit_to_565(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
+        int bpp, int deltaSrc, int offset, const SkPMColor* /*ctable*/) {
     uint16_t* SK_RESTRICT dst = (uint16_t*) dstRow;
 
     // increment src by byte offset and bitIndex by bit offset
@@ -146,72 +148,72 @@
 
 static SkSwizzler::ResultAlpha swizzle_small_index_to_index(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int bitsPerPixel, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
-    src += offset;
-    uint8_t* SK_RESTRICT dst = (uint8_t*) dstRow;
+    uint8_t* dst = (uint8_t*) dstRow;
     INIT_RESULT_ALPHA;
-    const uint32_t pixelsPerByte = 8 / bitsPerPixel;
-    const size_t rowBytes = compute_row_bytes_ppb(dstWidth, pixelsPerByte);
-    const uint8_t mask = (1 << bitsPerPixel) - 1;
-    int x = 0;
-    for (uint32_t byte = 0; byte < rowBytes; byte++) {
-        uint8_t pixelData = src[byte];
-        for (uint32_t p = 0; p < pixelsPerByte && x < dstWidth; p++) {
-            uint8_t index = (pixelData >> (8 - bitsPerPixel)) & mask;
-            UPDATE_RESULT_ALPHA(ctable[index] >> SK_A32_SHIFT);
-            dst[x] = index;
-            pixelData <<= bitsPerPixel;
-            x++;
-        }
+    src += offset / 8;
+    int bitIndex = offset % 8;
+    uint8_t currByte = *src;
+    const uint8_t mask = (1 << bpp) - 1;
+    uint8_t index = (currByte >> (8 - bpp - bitIndex)) & mask;
+    dst[0] = index;
+    UPDATE_RESULT_ALPHA(ctable[index] >> SK_A32_SHIFT);
+
+    for (int x = 1; x < dstWidth; x++) {
+        int bitOffset = bitIndex + deltaSrc;
+        bitIndex = bitOffset % 8;
+        currByte = *(src += bitOffset / 8);
+        index = (currByte >> (8 - bpp - bitIndex)) & mask;
+        dst[x] = index;
+        UPDATE_RESULT_ALPHA(ctable[index] >> SK_A32_SHIFT);
     }
     return COMPUTE_RESULT_ALPHA;
 }
 
 static SkSwizzler::ResultAlpha swizzle_small_index_to_565(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int bitsPerPixel, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
-    src += offset;
-    uint16_t* SK_RESTRICT dst = (uint16_t*) dstRow;
-    const uint32_t pixelsPerByte = 8 / bitsPerPixel;
-    const size_t rowBytes = compute_row_bytes_ppb(dstWidth, pixelsPerByte);
-    const uint8_t mask = (1 << bitsPerPixel) - 1;
-    int x = 0;
-    for (uint32_t byte = 0; byte < rowBytes; byte++) {
-        uint8_t pixelData = src[byte];
-        for (uint32_t p = 0; p < pixelsPerByte && x < dstWidth; p++) {
-            uint8_t index = (pixelData >> (8 - bitsPerPixel)) & mask;
-            uint16_t c = SkPixel32ToPixel16(ctable[index]);
-            dst[x] = c;
-            pixelData <<= bitsPerPixel;
-            x++;
-        }
+    uint16_t* dst = (uint16_t*) dstRow;
+    src += offset / 8;
+    int bitIndex = offset % 8;
+    uint8_t currByte = *src;
+    const uint8_t mask = (1 << bpp) - 1;
+    uint8_t index = (currByte >> (8 - bpp - bitIndex)) & mask;
+    dst[0] = SkPixel32ToPixel16(ctable[index]);
+
+    for (int x = 1; x < dstWidth; x++) {
+        int bitOffset = bitIndex + deltaSrc;
+        bitIndex = bitOffset % 8;
+        currByte = *(src += bitOffset / 8);
+        index = (currByte >> (8 - bpp - bitIndex)) & mask;
+        dst[x] = SkPixel32ToPixel16(ctable[index]);
     }
-    return SkSwizzler::kOpaque_ResultAlpha;
+    return SkAlphaType::kOpaque_SkAlphaType;
 }
 
 static SkSwizzler::ResultAlpha swizzle_small_index_to_n32(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int bitsPerPixel, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
-    src += offset;
-    SkPMColor* SK_RESTRICT dst = (SkPMColor*) dstRow;
+    SkPMColor* dst = (SkPMColor*) dstRow;
     INIT_RESULT_ALPHA;
-    const uint32_t pixelsPerByte = 8 / bitsPerPixel;
-    const size_t rowBytes = compute_row_bytes_ppb(dstWidth, pixelsPerByte);
-    const uint8_t mask = (1 << bitsPerPixel) - 1;
-    int x = 0;
-    for (uint32_t byte = 0; byte < rowBytes; byte++) {
-        uint8_t pixelData = src[byte];
-        for (uint32_t p = 0; p < pixelsPerByte && x < dstWidth; p++) {
-            uint8_t index = (pixelData >> (8 - bitsPerPixel)) & mask;
-            SkPMColor c = ctable[index];
-            UPDATE_RESULT_ALPHA(c >> SK_A32_SHIFT);
-            dst[x] = c;
-            pixelData <<= bitsPerPixel;
-            x++;
-        }
+    src += offset / 8;
+    int bitIndex = offset % 8;
+    uint8_t currByte = *src;
+    const uint8_t mask = (1 << bpp) - 1;
+    uint8_t index = (currByte >> (8 - bpp - bitIndex)) & mask;
+    dst[0] = ctable[index];
+    UPDATE_RESULT_ALPHA(ctable[index] >> SK_A32_SHIFT);
+
+    for (int x = 1; x < dstWidth; x++) {
+        int bitOffset = bitIndex + deltaSrc;
+        bitIndex = bitOffset % 8;
+        currByte = *(src += bitOffset / 8);
+        index = (currByte >> (8 - bpp - bitIndex)) & mask;
+        dst[x] = ctable[index];
+        UPDATE_RESULT_ALPHA(ctable[index] >> SK_A32_SHIFT);
     }
     return COMPUTE_RESULT_ALPHA;
 }
@@ -220,7 +222,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_index_to_index(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     uint8_t* SK_RESTRICT dst = (uint8_t*) dstRow;
@@ -248,7 +250,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_index_to_n32(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -264,7 +266,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_index_to_n32_skipZ(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -282,14 +284,14 @@
 
 static SkSwizzler::ResultAlpha swizzle_index_to_565(
       void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-      int bytesPerPixel, int offset, const SkPMColor ctable[]) {
+      int bytesPerPixel, int deltaSrc, int offset, const SkPMColor ctable[]) {
     // FIXME: Support dithering? Requires knowing y, which I think is a bigger
     // change.
     src += offset;
     uint16_t* SK_RESTRICT dst = (uint16_t*)dstRow;
     for (int x = 0; x < dstWidth; x++) {
         dst[x] = SkPixel32ToPixel16(ctable[*src]);
-        src += bytesPerPixel;
+        src += deltaSrc;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
@@ -301,7 +303,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_gray_to_n32(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -314,7 +316,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_gray_to_gray(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     uint8_t* SK_RESTRICT dst = (uint8_t*) dstRow;
@@ -331,13 +333,13 @@
 
 static SkSwizzler::ResultAlpha swizzle_gray_to_565(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int bytesPerPixel, int offset, const SkPMColor ctable[]) {
+        int bytesPerPixel, int deltaSrc, int offset, const SkPMColor ctable[]) {
     // FIXME: Support dithering?
     src += offset;
     uint16_t* SK_RESTRICT dst = (uint16_t*)dstRow;
     for (int x = 0; x < dstWidth; x++) {
         dst[x] = SkPack888ToRGB16(src[0], src[0], src[0]);
-        src += bytesPerPixel;
+        src += deltaSrc;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
@@ -346,7 +348,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_bgrx_to_n32(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -359,7 +361,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_bgrx_to_565(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
     // FIXME: Support dithering?
     src += offset;
     uint16_t* SK_RESTRICT dst = (uint16_t*)dstRow;
@@ -374,7 +376,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_bgra_to_n32_unpremul(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -390,7 +392,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_bgra_to_n32_premul(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -407,7 +409,7 @@
 // kRGBX
 static SkSwizzler::ResultAlpha swizzle_rgbx_to_n32(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -420,13 +422,13 @@
 
 static SkSwizzler::ResultAlpha swizzle_rgbx_to_565(
        void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-       int bytesPerPixel, int offset, const SkPMColor ctable[]) {
+       int bytesPerPixel, int deltaSrc, int offset, const SkPMColor ctable[]) {
     // FIXME: Support dithering?
     src += offset;
     uint16_t* SK_RESTRICT dst = (uint16_t*)dstRow;
     for (int x = 0; x < dstWidth; x++) {
         dst[x] = SkPack888ToRGB16(src[0], src[1], src[2]);
-        src += bytesPerPixel;
+        src += deltaSrc;
     }
     return SkSwizzler::kOpaque_ResultAlpha;
 }
@@ -435,7 +437,7 @@
 // kRGBA
 static SkSwizzler::ResultAlpha swizzle_rgba_to_n32_premul(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -451,7 +453,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_rgba_to_n32_unpremul(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     uint32_t* SK_RESTRICT dst = reinterpret_cast<uint32_t*>(dstRow);
@@ -467,7 +469,7 @@
 
 static SkSwizzler::ResultAlpha swizzle_rgba_to_n32_premul_skipZ(
         void* SK_RESTRICT dstRow, const uint8_t* SK_RESTRICT src, int dstWidth,
-        int deltaSrc, int offset, const SkPMColor ctable[]) {
+        int bpp, int deltaSrc, int offset, const SkPMColor ctable[]) {
 
     src += offset;
     SkPMColor* SK_RESTRICT dst = (SkPMColor*)dstRow;
@@ -700,7 +702,7 @@
     , fDeltaSrc(deltaSrc)
     , fDstInfo(info)
     , fSampleX(sampleX)
-    , fX0(sampleX == 1 ? 0 : sampleX >> 1)
+    , fX0(get_start_coord(sampleX))
 {
     // check that fX0 is less than original width
     SkASSERT(fX0 >= 0 && fX0 < fDstInfo.width() * fSampleX);
@@ -708,11 +710,13 @@
 
 SkSwizzler::ResultAlpha SkSwizzler::swizzle(void* dst, const uint8_t* SK_RESTRICT src) {
     SkASSERT(nullptr != dst && nullptr != src);
-    return fRowProc(dst, src, fDstInfo.width(), fSampleX * fDeltaSrc, fX0 * fDeltaSrc, fColorTable);
+    return fRowProc(dst, src, fDstInfo.width(), fDeltaSrc, fSampleX * fDeltaSrc,
+            fX0 * fDeltaSrc, fColorTable);
 }
 
 void SkSwizzler::Fill(void* dstStartRow, const SkImageInfo& dstInfo, size_t dstRowBytes,
-        uint32_t numRows, uint32_t colorOrIndex, const SkPMColor* colorTable) {
+        uint32_t numRows, uint32_t colorOrIndex, const SkPMColor* colorTable,
+        SkCodec::ZeroInitialized zeroInit) {
     SkASSERT(dstStartRow != nullptr);
     SkASSERT(numRows <= (uint32_t) dstInfo.height());
 
@@ -725,13 +729,17 @@
             // Assume input is an index if we have a color table
             uint32_t color;
             if (nullptr != colorTable) {
-                SkASSERT(colorOrIndex == (uint8_t) colorOrIndex);
-                color = colorTable[colorOrIndex];
+                color = colorTable[(uint8_t) colorOrIndex];
             // Otherwise, assume the input is a color
             } else {
                 color = colorOrIndex;
             }
 
+            // If memory is zero initialized, we may not need to fill
+            if (SkCodec::kYes_ZeroInitialized == zeroInit && 0 == color) {
+                return;
+            }
+
             // We must fill row by row in the case of unaligned row bytes
             if (SkIsAlign4((size_t) dstStartRow) && SkIsAlign4(dstRowBytes)) {
                 sk_memset32((uint32_t*) dstStartRow, color,
@@ -748,20 +756,6 @@
                 }
             }
             break;
-        // On an index destination color type, always assume the input is an index
-        case kIndex_8_SkColorType:
-            SkASSERT(colorOrIndex == (uint8_t) colorOrIndex);
-            memset(dstStartRow, colorOrIndex, bytesToFill);
-            break;
-        case kGray_8_SkColorType:
-            // If the destination is kGray, the caller passes in an 8-bit color.
-            // We will not assert that the high bits of colorOrIndex must be zeroed.
-            // This allows us to take advantage of the fact that the low 8 bits of an
-            // SKPMColor may be a valid a grayscale color.  For example, the low 8
-            // bits of SK_ColorBLACK are identical to the grayscale representation
-            // for black. 
-            memset(dstStartRow, (uint8_t) colorOrIndex, bytesToFill);
-            break;
         case kRGB_565_SkColorType:
             // If the destination is k565, the caller passes in a 16-bit color.
             // We will not assert that the high bits of colorOrIndex must be zeroed.
@@ -769,7 +763,27 @@
             // SKPMColor may be a valid a 565 color.  For example, the low 16
             // bits of SK_ColorBLACK are identical to the 565 representation
             // for black.
-            memset(dstStartRow, (uint16_t) colorOrIndex, bytesToFill);
+            // If we ever want to fill with colorOrIndex != 0, we will probably need
+            // to implement this with sk_memset16().
+            SkASSERT((uint16_t) colorOrIndex == (uint8_t) colorOrIndex);
+            // Fall through
+        case kIndex_8_SkColorType:
+            // On an index destination color type, always assume the input is an index.
+            // Fall through
+        case kGray_8_SkColorType:
+            // If the destination is kGray, the caller passes in an 8-bit color.
+            // We will not assert that the high bits of colorOrIndex must be zeroed.
+            // This allows us to take advantage of the fact that the low 8 bits of an
+            // SKPMColor may be a valid a grayscale color.  For example, the low 8
+            // bits of SK_ColorBLACK are identical to the grayscale representation
+            // for black.
+
+            // If memory is zero initialized, we may not need to fill
+            if (SkCodec::kYes_ZeroInitialized == zeroInit && 0 == (uint8_t) colorOrIndex) {
+                return;
+            }
+
+            memset(dstStartRow, (uint8_t) colorOrIndex, bytesToFill);
             break;
         default:
             SkCodecPrintf("Error: Unsupported dst color type for fill().  Doing nothing.\n");
diff --git a/src/codec/SkSwizzler.h b/src/codec/SkSwizzler.h
index 3d57205..a7f29b0 100644
--- a/src/codec/SkSwizzler.h
+++ b/src/codec/SkSwizzler.h
@@ -163,9 +163,13 @@
      *
      * Other SkColorTypes are not supported.
      *
+     * @param zeroInit
+     * Indicates whether memory is already zero initialized.
+     *
      */
     static void Fill(void* dstStartRow, const SkImageInfo& dstInfo, size_t dstRowBytes,
-            uint32_t numRows, uint32_t colorOrIndex, const SkPMColor* colorTable);
+            uint32_t numRows, uint32_t colorOrIndex, const SkPMColor* colorTable,
+            SkCodec::ZeroInitialized zeroInit);
 
     /**
      *  Swizzle a line. Generally this will be called height times, once
@@ -188,15 +192,16 @@
      *  @param dstRow Row in which to write the resulting pixels.
      *  @param src Row of src data, in format specified by SrcConfig
      *  @param dstWidth Width in pixels of the destination
-     *  @param deltaSrc if bitsPerPixel % 8 == 0, deltaSrc is bytesPerPixel
-     *                  else, deltaSrc is bitsPerPixel
+     *  @param bpp if bitsPerPixel % 8 == 0, deltaSrc is bytesPerPixel
+     *             else, deltaSrc is bitsPerPixel
+     *  @param deltaSrc bpp * sampleX
      *  @param ctable Colors (used for kIndex source).
      *  @param offset The offset before the first pixel to sample.
                         Is in bytes or bits based on what deltaSrc is in.
      */
     typedef ResultAlpha (*RowProc)(void* SK_RESTRICT dstRow,
                                    const uint8_t* SK_RESTRICT src,
-                                   int dstWidth, int deltaSrc, int offset,
+                                   int dstWidth, int bpp, int deltaSrc, int offset,
                                    const SkPMColor ctable[]);
 
     const RowProc       fRowProc;
diff --git a/tests/CodexTest.cpp b/tests/CodexTest.cpp
index 81d8eb4..27c48fb 100644
--- a/tests/CodexTest.cpp
+++ b/tests/CodexTest.cpp
@@ -158,7 +158,9 @@
             REPORTER_ASSERT(r, result == SkCodec::kSuccess);
         }
         // verify that scanline decoding gives the same result.
-        compare_to_good_digest(r, digest, bm);
+        if (SkScanlineDecoder::kTopDown_SkScanlineOrder == scanlineDecoder->getScanlineOrder()) {
+            compare_to_good_digest(r, digest, bm);
+        }
     } else {
         REPORTER_ASSERT(r, !scanlineDecoder);
     }
@@ -209,7 +211,7 @@
     check(r, "yellow_rose.webp", SkISize::Make(400, 301), false, true);
 
     // BMP
-    check(r, "randPixels.bmp", SkISize::Make(8, 8), false, false);
+    check(r, "randPixels.bmp", SkISize::Make(8, 8), true, false);
 
     // ICO
     // These two tests examine interestingly different behavior:
diff --git a/tests/SwizzlerTest.cpp b/tests/SwizzlerTest.cpp
index 6f0dabe..256a4b2 100644
--- a/tests/SwizzlerTest.cpp
+++ b/tests/SwizzlerTest.cpp
@@ -37,7 +37,7 @@
 
     // Fill image with the fill value starting at the indicated row
     SkSwizzler::Fill(imageStart, imageInfo, rowBytes, endRow - startRow + 1, colorOrIndex,
-            colorTable);
+            colorTable, SkCodec::kNo_ZeroInitialized);
 
     // Ensure that the pixels are filled properly
     // The bots should catch any memory corruption