halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2011 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "SkPDFMakeToUnicodeCmap.h" |
| 9 | #include "SkPDFUtils.h" |
| 10 | #include "SkUtils.h" |
| 11 | |
| 12 | static void append_tounicode_header(SkDynamicMemoryWStream* cmap, |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 13 | bool multibyte) { |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 14 | // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. |
| 15 | // It's there to prevent old version Adobe Readers from malfunctioning. |
| 16 | const char* kHeader = |
| 17 | "/CIDInit /ProcSet findresource begin\n" |
| 18 | "12 dict begin\n" |
| 19 | "begincmap\n"; |
| 20 | cmap->writeText(kHeader); |
| 21 | |
| 22 | // The /CIDSystemInfo must be consistent to the one in |
| 23 | // SkPDFFont::populateCIDFont(). |
| 24 | // We can not pass over the system info object here because the format is |
| 25 | // different. This is not a reference object. |
| 26 | const char* kSysInfo = |
| 27 | "/CIDSystemInfo\n" |
halcanary | 59be20c | 2016-09-01 14:10:00 -0700 | [diff] [blame] | 28 | "<< /Registry (Adobe)\n" |
| 29 | "/Ordering (UCS)\n" |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 30 | "/Supplement 0\n" |
| 31 | ">> def\n"; |
| 32 | cmap->writeText(kSysInfo); |
| 33 | |
| 34 | // The CMapName must be consistent to /CIDSystemInfo above. |
| 35 | // /CMapType 2 means ToUnicode. |
| 36 | // Codespace range just tells the PDF processor the valid range. |
| 37 | const char* kTypeInfoHeader = |
halcanary | 59be20c | 2016-09-01 14:10:00 -0700 | [diff] [blame] | 38 | "/CMapName /Adobe-Identity-UCS def\n" |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 39 | "/CMapType 2 def\n" |
| 40 | "1 begincodespacerange\n"; |
| 41 | cmap->writeText(kTypeInfoHeader); |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 42 | if (multibyte) { |
| 43 | cmap->writeText("<0000> <FFFF>\n"); |
| 44 | } else { |
| 45 | cmap->writeText("<00> <FF>\n"); |
| 46 | } |
| 47 | cmap->writeText("endcodespacerange\n"); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 48 | } |
| 49 | |
| 50 | static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { |
| 51 | const char kFooter[] = |
| 52 | "endcmap\n" |
| 53 | "CMapName currentdict /CMap defineresource pop\n" |
| 54 | "end\n" |
| 55 | "end"; |
| 56 | cmap->writeText(kFooter); |
| 57 | } |
| 58 | |
| 59 | namespace { |
| 60 | struct BFChar { |
| 61 | SkGlyphID fGlyphId; |
| 62 | SkUnichar fUnicode; |
| 63 | }; |
| 64 | |
| 65 | struct BFRange { |
| 66 | SkGlyphID fStart; |
| 67 | SkGlyphID fEnd; |
| 68 | SkUnichar fUnicode; |
| 69 | }; |
| 70 | } // namespace |
| 71 | |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 72 | static void write_glyph(SkDynamicMemoryWStream* cmap, |
| 73 | bool multiByte, |
| 74 | SkGlyphID gid) { |
| 75 | if (multiByte) { |
| 76 | SkPDFUtils::WriteUInt16BE(cmap, gid); |
| 77 | } else { |
| 78 | SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); |
| 79 | } |
| 80 | } |
| 81 | |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 82 | static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 83 | bool multiByte, |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 84 | SkDynamicMemoryWStream* cmap) { |
| 85 | // PDF spec defines that every bf* list can have at most 100 entries. |
| 86 | for (int i = 0; i < bfchar.count(); i += 100) { |
| 87 | int count = bfchar.count() - i; |
| 88 | count = SkMin32(count, 100); |
| 89 | cmap->writeDecAsText(count); |
| 90 | cmap->writeText(" beginbfchar\n"); |
| 91 | for (int j = 0; j < count; ++j) { |
| 92 | cmap->writeText("<"); |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 93 | write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 94 | cmap->writeText("> <"); |
halcanary | f59d18a | 2016-09-16 14:44:57 -0700 | [diff] [blame] | 95 | SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 96 | cmap->writeText(">\n"); |
| 97 | } |
| 98 | cmap->writeText("endbfchar\n"); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 103 | bool multiByte, |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 104 | SkDynamicMemoryWStream* cmap) { |
| 105 | // PDF spec defines that every bf* list can have at most 100 entries. |
| 106 | for (int i = 0; i < bfrange.count(); i += 100) { |
| 107 | int count = bfrange.count() - i; |
| 108 | count = SkMin32(count, 100); |
| 109 | cmap->writeDecAsText(count); |
| 110 | cmap->writeText(" beginbfrange\n"); |
| 111 | for (int j = 0; j < count; ++j) { |
| 112 | cmap->writeText("<"); |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 113 | write_glyph(cmap, multiByte, bfrange[i + j].fStart); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 114 | cmap->writeText("> <"); |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 115 | write_glyph(cmap, multiByte, bfrange[i + j].fEnd); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 116 | cmap->writeText("> <"); |
halcanary | f59d18a | 2016-09-16 14:44:57 -0700 | [diff] [blame] | 117 | SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 118 | cmap->writeText(">\n"); |
| 119 | } |
| 120 | cmap->writeText("endbfrange\n"); |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe |
| 125 | // Technote 5014. |
| 126 | // The function is not static so we can test it in unit tests. |
| 127 | // |
| 128 | // Current implementation guarantees bfchar and bfrange entries do not overlap. |
| 129 | // |
| 130 | // Current implementation does not attempt aggresive optimizations against |
| 131 | // following case because the specification is not clear. |
| 132 | // |
| 133 | // 4 beginbfchar 1 beginbfchar |
| 134 | // <0003> <0013> <0020> <0014> |
| 135 | // <0005> <0015> to endbfchar |
| 136 | // <0007> <0017> 1 beginbfrange |
| 137 | // <0020> <0014> <0003> <0007> <0013> |
| 138 | // endbfchar endbfrange |
| 139 | // |
| 140 | // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may |
| 141 | // overlap, but succeeding maps supersede preceding maps." |
| 142 | // |
| 143 | // In case of searching text in PDF, bfrange will have higher precedence so |
| 144 | // typing char id 0x0014 in search box will get glyph id 0x0004 first. However, |
| 145 | // the spec does not mention how will this kind of conflict being resolved. |
| 146 | // |
| 147 | // For the worst case (having 65536 continuous unicode and we use every other |
| 148 | // one of them), the possible savings by aggressive optimization is 416KB |
| 149 | // pre-compressed and does not provide enough motivation for implementation. |
| 150 | void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, |
halcanary | 530032a | 2016-08-18 14:22:52 -0700 | [diff] [blame] | 151 | const SkBitSet* subset, |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 152 | SkDynamicMemoryWStream* cmap, |
| 153 | bool multiByteGlyphs, |
| 154 | SkGlyphID firstGlyphID, |
| 155 | SkGlyphID lastGlyphID) { |
| 156 | if (glyphToUnicode.isEmpty()) { |
| 157 | return; |
| 158 | } |
| 159 | int glyphOffset = 0; |
| 160 | if (!multiByteGlyphs) { |
| 161 | glyphOffset = firstGlyphID - 1; |
| 162 | } |
| 163 | |
| 164 | SkTDArray<BFChar> bfcharEntries; |
| 165 | SkTDArray<BFRange> bfrangeEntries; |
| 166 | |
| 167 | BFRange currentRangeEntry = {0, 0, 0}; |
| 168 | bool rangeEmpty = true; |
| 169 | const int limit = |
| 170 | SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset; |
| 171 | |
| 172 | for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { |
| 173 | bool inSubset = i < limit && |
| 174 | (subset == nullptr || subset->has(i + glyphOffset)); |
| 175 | if (!rangeEmpty) { |
| 176 | // PDF spec requires bfrange not changing the higher byte, |
| 177 | // e.g. <1035> <10FF> <2222> is ok, but |
| 178 | // <1035> <1100> <2222> is no good |
| 179 | bool inRange = |
| 180 | i == currentRangeEntry.fEnd + 1 && |
| 181 | i >> 8 == currentRangeEntry.fStart >> 8 && |
| 182 | i < limit && |
| 183 | glyphToUnicode[i + glyphOffset] == |
| 184 | currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; |
| 185 | if (!inSubset || !inRange) { |
| 186 | if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { |
| 187 | bfrangeEntries.push(currentRangeEntry); |
| 188 | } else { |
| 189 | BFChar* entry = bfcharEntries.append(); |
| 190 | entry->fGlyphId = currentRangeEntry.fStart; |
| 191 | entry->fUnicode = currentRangeEntry.fUnicode; |
| 192 | } |
| 193 | rangeEmpty = true; |
| 194 | } |
| 195 | } |
| 196 | if (inSubset) { |
| 197 | currentRangeEntry.fEnd = i; |
| 198 | if (rangeEmpty) { |
| 199 | currentRangeEntry.fStart = i; |
| 200 | currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset]; |
| 201 | rangeEmpty = false; |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | // The spec requires all bfchar entries for a font must come before bfrange |
| 207 | // entries. |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 208 | append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); |
| 209 | append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 210 | } |
| 211 | |
| 212 | sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( |
| 213 | const SkTDArray<SkUnichar>& glyphToUnicode, |
halcanary | 530032a | 2016-08-18 14:22:52 -0700 | [diff] [blame] | 214 | const SkBitSet* subset, |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 215 | bool multiByteGlyphs, |
| 216 | SkGlyphID firstGlyphID, |
| 217 | SkGlyphID lastGlyphID) { |
| 218 | SkDynamicMemoryWStream cmap; |
halcanary | 3d01c62 | 2016-08-31 12:52:35 -0700 | [diff] [blame] | 219 | append_tounicode_header(&cmap, multiByteGlyphs); |
halcanary | 8eccc30 | 2016-08-09 13:04:34 -0700 | [diff] [blame] | 220 | SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, |
| 221 | firstGlyphID, lastGlyphID); |
| 222 | append_cmap_footer(&cmap); |
| 223 | return sk_make_sp<SkPDFStream>( |
| 224 | std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); |
| 225 | } |