[PDF] Add a ToUnicode mapping for fonts. This makes text in PDFs searchable and copy&paste-able. Code from arthurhsu@chromium.org. Original review: http://codereview.appspot.com/4428082/ Review URL: http://codereview.appspot.com/4525042 git-svn-id: http://skia.googlecode.com/svn/trunk@1280 2bbb7eff-a529-9590-31e7-b0007b416f81

commit: 6744d498fcbbbcf503ec80c4d43dd8f118a88597 [log] [tgz]
author: vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> Mon May 09 18:13:47 2011 +0000
committer: vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> Mon May 09 18:13:47 2011 +0000
tree: 855c458a26e42d13ff29f9aa51266f5e151b9569
parent: 339ac3d0a7650c98de35afbcff4ac1d5b47199c0 [diff]
diff --git a/src/ports/SkFontHost_FreeType.cpp b/src/ports/SkFontHost_FreeType.cpp
index 5ed66c8..b3cc783 100644
--- a/src/ports/SkFontHost_FreeType.cpp
+++ b/src/ports/SkFontHost_FreeType.cpp

@@ -339,6 +339,56 @@
     return true;
 }
 
+static void populate_glyph_to_unicode(FT_Face& face,
+                                      SkTDArray<SkUnichar>* glyphToUnicode) {
+    // Check and see if we have Unicode cmaps.
+    for (int i = 0; i < face->num_charmaps; ++i) {
+        // CMaps known to support Unicode:
+        // Platform ID   Encoding ID   Name
+        // -----------   -----------   -----------------------------------
+        // 0             0,1           Apple Unicode
+        // 0             3             Apple Unicode 2.0 (preferred)
+        // 3             1             Microsoft Unicode UCS-2
+        // 3             10            Microsoft Unicode UCS-4 (preferred)
+        //
+        // See Apple TrueType Reference Manual
+        // http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
+        // http://developer.apple.com/fonts/TTRefMan/RM06/Chap6name.html#ID
+        // Microsoft OpenType Specification
+        // http://www.microsoft.com/typography/otspec/cmap.htm
+
+        FT_UShort platformId = face->charmaps[i]->platform_id;
+        FT_UShort encodingId = face->charmaps[i]->encoding_id;
+
+        if (platformId != 0 && platformId != 3) {
+            continue;
+        }
+        if (platformId == 3 && encodingId != 1 && encodingId != 10) {
+            continue;
+        }
+        bool preferredMap = ((platformId == 3 && encodingId == 10) ||
+                             (platformId == 0 && encodingId == 3));
+
+        FT_Set_Charmap(face, face->charmaps[i]);
+        if (glyphToUnicode->isEmpty()) {
+            glyphToUnicode->setCount(face->num_glyphs);
+            memset(glyphToUnicode->begin(), 0,
+                   sizeof(SkUnichar) * face->num_glyphs);
+        }
+
+        // Iterate through each cmap entry.
+        FT_UInt glyphIndex;
+        for (SkUnichar charCode = FT_Get_First_Char(face, &glyphIndex);
+             glyphIndex != 0;
+             charCode = FT_Get_Next_Char(face, charCode, &glyphIndex)) {
+            if (charCode &&
+                    ((*glyphToUnicode)[glyphIndex] == 0 || preferredMap)) {
+                (*glyphToUnicode)[glyphIndex] = charCode;
+            }
+        }
+    }
+}
+
 // static
 SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics(
         uint32_t fontID,
@@ -509,6 +559,12 @@
         }
     }
 
+    if (perGlyphInfo & SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo &&
+           info->fType != SkAdvancedTypefaceMetrics::kType1_Font &&
+           face->num_charmaps) {
+        populate_glyph_to_unicode(face, &(info->fGlyphToUnicode));
+    }
+
     if (!canEmbed(face))
         info->fType = SkAdvancedTypefaceMetrics::kNotEmbeddable_Font;
 

diff --git a/src/ports/SkFontHost_win.cpp b/src/ports/SkFontHost_win.cpp
old mode 100644
new mode 100755
index bc66010..fd5b929
--- a/src/ports/SkFontHost_win.cpp
+++ b/src/ports/SkFontHost_win.cpp

@@ -194,6 +194,58 @@
     }
 }
 
+// Construct Glyph to Unicode table.
+// Unicode code points that require conjugate pairs in utf16 are not
+// supported.
+// TODO(arthurhsu): Add support for conjugate pairs. It looks like that may
+// require parsing the TTF cmap table (platform 4, encoding 12) directly instead
+// of calling GetFontUnicodeRange().
+static void populate_glyph_to_unicode(HDC fontHdc, const unsigned glyphCount,
+                                      SkTDArray<SkUnichar>* glyphToUnicode) {
+    DWORD glyphSetBufferSize = GetFontUnicodeRanges(fontHdc, NULL);
+    if (!glyphSetBufferSize) {
+        return;
+    }
+
+    SkAutoTDeleteArray<BYTE> glyphSetBuffer(new BYTE[glyphSetBufferSize]);
+    GLYPHSET* glyphSet =
+        reinterpret_cast<LPGLYPHSET>(glyphSetBuffer.get());
+    if (GetFontUnicodeRanges(fontHdc, glyphSet) != glyphSetBufferSize) {
+        return;
+    }
+
+    glyphToUnicode->setCount(glyphCount);
+    memset(glyphToUnicode->begin(), 0, glyphCount * sizeof(SkUnichar));
+    for (DWORD i = 0; i < glyphSet->cRanges; ++i) {
+        // There is no guarantee that within a Unicode range, the corresponding
+        // glyph id in a font file are continuous. So, even if we have ranges,
+        // we can't just use the first and last entry of the range to compute
+        // result. We need to enumerate them one by one.
+        int count = glyphSet->ranges[i].cGlyphs;
+        SkAutoTArray<WCHAR> chars(count + 1);
+        chars[count] = 0;  // termintate string
+        SkAutoTArray<WORD> glyph(count);
+        for (USHORT j = 0; j < count; ++j) {
+            chars[j] = glyphSet->ranges[i].wcLow + j;
+        }
+        GetGlyphIndicesW(fontHdc, chars.get(), count, glyph.get(),
+                         GGI_MARK_NONEXISTING_GLYPHS);
+        // If the glyph ID is valid, and the glyph is not mapped, then we will
+        // fill in the char id into the vector. If the glyph is mapped already,
+        // skip it.
+        // TODO(arthurhsu): better improve this. e.g. Get all used char ids from
+        // font cache, then generate this mapping table from there. It's
+        // unlikely to have collisions since glyph reuse happens mostly for
+        // different Unicode pages.
+        for (USHORT j = 0; j < count; ++j) {
+            if (glyph[j] != 0xffff && glyph[j] < glyphCount &&
+                (*glyphToUnicode)[glyph[j]] == 0) {
+                (*glyphToUnicode)[glyph[j]] = chars[j];
+            }
+        }
+    }
+}
+
 //////////////////////////////////////////////////////////////////////////////////////////////
 
 class SkScalerContext_Windows : public SkScalerContext {
@@ -649,6 +701,10 @@
     info->fFontName.set(lf.lfFaceName);
 #endif
 
+    if (perGlyphInfo & SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo) {
+        populate_glyph_to_unicode(hdc, glyphCount, &(info->fGlyphToUnicode));
+    }
+
     if (otm.otmTextMetrics.tmPitchAndFamily & TMPF_TRUETYPE) {
         info->fType = SkAdvancedTypefaceMetrics::kTrueType_Font;
     } else {
commit	6744d498fcbbbcf503ec80c4d43dd8f118a88597	[log] [tgz]
author	vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>	Mon May 09 18:13:47 2011 +0000
committer	vandebo@chromium.org <vandebo@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>	Mon May 09 18:13:47 2011 +0000
tree	855c458a26e42d13ff29f9aa51266f5e151b9569
parent	339ac3d0a7650c98de35afbcff4ac1d5b47199c0 [diff]