SkPDF:  detect YUV-JPEG without relying on ImageGenerator

JPEG/JFIF References:
*   http://www.w3.org/Graphics/JPEG/itu-t81.pdf
*   http://www.w3.org/Graphics/JPEG/jfif3.pdf

BUG=476721
BUG=446940

Review URL: https://codereview.chromium.org/1133443003
diff --git a/gyp/pdf.gypi b/gyp/pdf.gypi
index 68c9897..0c9cb06 100644
--- a/gyp/pdf.gypi
+++ b/gyp/pdf.gypi
@@ -12,6 +12,8 @@
 {
     'sources': [
         '<(skia_src_path)/doc/SkDocument_PDF.cpp',
+        '<(skia_src_path)/pdf/SkJpegInfo.cpp',
+        '<(skia_src_path)/pdf/SkJpegInfo.h',
         '<(skia_src_path)/pdf/SkPDFBitmap.cpp',
         '<(skia_src_path)/pdf/SkPDFBitmap.h',
         '<(skia_src_path)/pdf/SkPDFCanon.cpp',
diff --git a/src/pdf/SkJpegInfo.cpp b/src/pdf/SkJpegInfo.cpp
new file mode 100644
index 0000000..85cd325
--- /dev/null
+++ b/src/pdf/SkJpegInfo.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkData.h"
+#include "SkJpegInfo.h"
+
+namespace {
+class JpegSegment {
+public:
+    JpegSegment(const SkData* skdata)
+        : fData(static_cast<const char*>(skdata->data()))
+        , fSize(skdata->size())
+        , fOffset(0)
+        , fLength(0) {}
+    bool read() {
+        if (!this->readBigendianUint16(&fMarker)) {
+            return false;
+        }
+        if (JpegSegment::StandAloneMarker(fMarker)) {
+            fLength = 0;
+            fBuffer = NULL;
+            return true;
+        }
+        if (!this->readBigendianUint16(&fLength) || fLength < 2) {
+            return false;
+        }
+        fLength -= 2;  // Length includes itself for some reason.
+        if (fOffset + fLength > fSize) {
+            return false;  // Segment too long.
+        }
+        fBuffer = &fData[fOffset];
+        fOffset += fLength;
+        return true;
+    }
+
+    bool isSOF() {
+        return (fMarker & 0xFFF0) == 0xFFC0 && fMarker != 0xFFC4 &&
+               fMarker != 0xFFC8 && fMarker != 0xFFCC;
+    }
+    uint16_t marker() { return fMarker; }
+    uint16_t length() { return fLength; }
+    const char* data() { return fBuffer; }
+
+    static uint16_t GetBigendianUint16(const char* ptr) {
+        // "the most significant byte shall come first"
+        return (static_cast<uint8_t>(ptr[0]) << 8) |
+            static_cast<uint8_t>(ptr[1]);
+    }
+
+private:
+    const char* const fData;
+    const size_t fSize;
+    size_t fOffset;
+    const char* fBuffer;
+    uint16_t fMarker;
+    uint16_t fLength;
+
+    bool readBigendianUint16(uint16_t* value) {
+        if (fOffset + 2 > fSize) {
+            return false;
+        }
+        *value = JpegSegment::GetBigendianUint16(&fData[fOffset]);
+        fOffset += 2;
+        return true;
+    }
+    static bool StandAloneMarker(uint16_t marker) {
+        // RST[m] markers or SOI, EOI, TEM
+        return (marker & 0xFFF8) == 0xFFD0 || marker == 0xFFD8 ||
+               marker == 0xFFD9 || marker == 0xFF01;
+    }
+};
+}  // namespace
+
+bool SkIsJFIF(const SkData* skdata, SkJFIFInfo* info) {
+    static const uint16_t kSOI = 0xFFD8;
+    static const uint16_t kAPP0 = 0xFFE0;
+    JpegSegment segment(skdata);
+    if (!segment.read() || segment.marker() != kSOI) {
+        return false;  // not a JPEG
+    }
+    if (!segment.read() || segment.marker() != kAPP0) {
+        return false;  // not an APP0 segment
+    }
+    static const char kJfif[] = {'J', 'F', 'I', 'F', '\0'};
+    SkASSERT(segment.data());
+    if (SkToSizeT(segment.length()) < sizeof(kJfif) ||
+        0 != memcmp(segment.data(), kJfif, sizeof(kJfif))) {
+        return false;  // Not JFIF JPEG
+    }
+    do {
+        if (!segment.read()) {
+            return false;  // malformed JPEG
+        }
+    } while (!segment.isSOF());
+    if (segment.length() < 6) {
+        return false;  // SOF segment is short
+    }
+    if (8 != segment.data()[0]) {
+        return false;  // Only support 8-bit precision
+    }
+    int numberOfComponents = segment.data()[5];
+    if (numberOfComponents != 1 && numberOfComponents != 3) {
+        return false;  // Invalid JFIF
+    }
+    if (info) {
+        info->fHeight = JpegSegment::GetBigendianUint16(&segment.data()[1]);
+        info->fWidth = JpegSegment::GetBigendianUint16(&segment.data()[3]);
+        if (numberOfComponents == 3) {
+            info->fType = SkJFIFInfo::kYCbCr;
+        } else {
+            info->fType = SkJFIFInfo::kGrayscale;
+        }
+    }
+    return true;
+}
+
+
diff --git a/src/pdf/SkJpegInfo.h b/src/pdf/SkJpegInfo.h
new file mode 100644
index 0000000..1be4c0f
--- /dev/null
+++ b/src/pdf/SkJpegInfo.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkJpegInfo_DEFINED
+#define SkJpegInfo_DEFINED
+
+class SkData;
+
+struct SkJFIFInfo {
+    int fWidth;
+    int fHeight;
+    enum Type {
+        kGrayscale,
+        kYCbCr,
+    } fType;
+};
+
+/** Returns true iff the data seems to be a valid JFIF JPEG image.  
+    If so and if info is not NULL, populate info.
+
+    JPEG/JFIF References:
+        http://www.w3.org/Graphics/JPEG/itu-t81.pdf
+        http://www.w3.org/Graphics/JPEG/jfif3.pdf
+*/
+bool SkIsJFIF(const SkData* skdata, SkJFIFInfo* info);
+
+#endif  // SkJpegInfo_DEFINED
diff --git a/src/pdf/SkPDFBitmap.cpp b/src/pdf/SkPDFBitmap.cpp
index 29b3bee..4e044a2 100644
--- a/src/pdf/SkPDFBitmap.cpp
+++ b/src/pdf/SkPDFBitmap.cpp
@@ -9,6 +9,7 @@
 #include "SkData.h"
 #include "SkFlate.h"
 #include "SkImageGenerator.h"
+#include "SkJpegInfo.h"
 #include "SkPDFBitmap.h"
 #include "SkPDFCanon.h"
 #include "SkPixelRef.h"
@@ -401,8 +402,9 @@
 class PDFJpegBitmap : public SkPDFBitmap {
 public:
     SkAutoTUnref<SkData> fData;
-    PDFJpegBitmap(const SkBitmap& bm, SkData* data)
-        : SkPDFBitmap(bm), fData(SkRef(data)) {}
+    bool fIsYUV;
+    PDFJpegBitmap(const SkBitmap& bm, SkData* data, bool isYUV)
+        : SkPDFBitmap(bm), fData(SkRef(data)), fIsYUV(isYUV) {}
     void emitObject(SkWStream*,
                     const SkPDFObjNumMap&,
                     const SkPDFSubstituteMap&) override;
@@ -415,7 +417,11 @@
     pdfDict.insertName("Subtype", "Image");
     pdfDict.insertInt("Width", fBitmap.width());
     pdfDict.insertInt("Height", fBitmap.height());
-    pdfDict.insertName("ColorSpace", "DeviceRGB");
+    if (fIsYUV) {
+        pdfDict.insertName("ColorSpace", "DeviceRGB");
+    } else {
+        pdfDict.insertName("ColorSpace", "DeviceGray");
+    }
     pdfDict.insertInt("BitsPerComponent", 8);
     pdfDict.insertName("Filter", "DCTDecode");
     pdfDict.insertInt("ColorTransform", 0);
@@ -429,23 +435,6 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 
-static bool is_jfif_yuv_jpeg(SkData* data) {
-    const uint8_t bytesZeroToThree[] = {0xFF, 0xD8, 0xFF, 0xE0};
-    const uint8_t bytesSixToTen[] = {'J', 'F', 'I', 'F', 0};
-    // 0   1   2   3   4   5   6   7   8   9   10
-    // FF  D8  FF  E0  ??  ??  'J' 'F' 'I' 'F' 00 ...
-    if (data->size() < 11 ||
-        0 != memcmp(data->bytes(), bytesZeroToThree,
-                    sizeof(bytesZeroToThree)) ||
-        0 != memcmp(data->bytes() + 6, bytesSixToTen, sizeof(bytesSixToTen))) {
-        return false;
-    }
-    SkAutoTDelete<SkImageGenerator> gen(SkImageGenerator::NewFromData(data));
-    SkISize sizes[3];
-    // Only YUV JPEG allows access to YUV planes.
-    return gen && gen->getYUV8Planes(sizes, NULL, NULL, NULL);
-}
-
 SkPDFBitmap* SkPDFBitmap::Create(SkPDFCanon* canon, const SkBitmap& bitmap) {
     SkASSERT(canon);
     if (!SkColorTypeIsValid(bitmap.colorType()) ||
@@ -465,8 +454,10 @@
         bm.dimensions() == bm.pixelRef()->info().dimensions()) {
         // Requires the bitmap to be backed by lazy pixels.
         SkAutoTUnref<SkData> data(bm.pixelRef()->refEncodedData());
-        if (data && is_jfif_yuv_jpeg(data)) {
-            SkPDFBitmap* pdfBitmap = SkNEW_ARGS(PDFJpegBitmap, (bm, data));
+        SkJFIFInfo info;
+        if (data && SkIsJFIF(data, &info)) {
+            bool yuv = info.fType == SkJFIFInfo::kYCbCr;
+            SkPDFBitmap* pdfBitmap = SkNEW_ARGS(PDFJpegBitmap, (bm, data, yuv));
             canon->addBitmap(pdfBitmap);
             return pdfBitmap;
         }
diff --git a/tests/PDFJpegEmbedTest.cpp b/tests/PDFJpegEmbedTest.cpp
index 133d84a..cfe6776 100644
--- a/tests/PDFJpegEmbedTest.cpp
+++ b/tests/PDFJpegEmbedTest.cpp
@@ -87,3 +87,41 @@
     // embedded into the PDF directly.
     REPORTER_ASSERT(r, !is_subset_of(cmykData, pdfData));
 }
+
+#include "SkJpegInfo.h"
+
+DEF_TEST(JpegIdentification, r) {
+    static struct {
+        const char* path;
+        bool isJfif;
+        SkJFIFInfo::Type type;
+    } kTests[] = {{"CMYK.jpg", false, SkJFIFInfo::kGrayscale},
+                  {"color_wheel.jpg", true, SkJFIFInfo::kYCbCr},
+                  {"grayscale.jpg", true, SkJFIFInfo::kGrayscale},
+                  {"mandrill_512_q075.jpg", true, SkJFIFInfo::kYCbCr},
+                  {"randPixels.jpg", true, SkJFIFInfo::kYCbCr}};
+    for (size_t i = 0; i < SK_ARRAY_COUNT(kTests); ++i) {
+        SkAutoTUnref<SkData> data(
+                load_resource(r, "JpegIdentification", kTests[i].path));
+        if (!data) {
+            continue;
+        }
+        SkJFIFInfo info;
+        bool isJfif = SkIsJFIF(data, &info);
+        if (isJfif != kTests[i].isJfif) {
+            ERRORF(r, "%s failed isJfif test", kTests[i].path);
+            continue;
+        }
+        if (!isJfif) {
+            continue;  // not applicable
+        }
+        if (kTests[i].type != info.fType) {
+            ERRORF(r, "%s failed jfif type test", kTests[i].path);
+            continue;
+        }
+        if (r->verbose()) {
+            SkDebugf("\nJpegIdentification: %s [%d x %d]\n", kTests[i].path,
+                     info.fWidth, info.fHeight);
+        }
+    }
+}