blob: 42e35f51fb51ae9060895eb1f88872564fc4e819 [file] [log] [blame]
halcanary8eccc302016-08-09 13:04:34 -07001/*
2 * Copyright 2011 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkPDFMakeToUnicodeCmap.h"
Hal Canaryc640d0d2018-06-13 09:59:02 -04009
halcanary8eccc302016-08-09 13:04:34 -070010#include "SkPDFUtils.h"
Hal Canary8b681102018-09-05 22:32:41 -040011#include "SkStreamPriv.h"
Hal Canaryc640d0d2018-06-13 09:59:02 -040012#include "SkTo.h"
Hal Canaryea60b952018-08-21 11:45:46 -040013#include "SkUTF.h"
halcanary8eccc302016-08-09 13:04:34 -070014
15static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
halcanary3d01c622016-08-31 12:52:35 -070016 bool multibyte) {
halcanary8eccc302016-08-09 13:04:34 -070017 // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
18 // It's there to prevent old version Adobe Readers from malfunctioning.
19 const char* kHeader =
20 "/CIDInit /ProcSet findresource begin\n"
21 "12 dict begin\n"
22 "begincmap\n";
23 cmap->writeText(kHeader);
24
25 // The /CIDSystemInfo must be consistent to the one in
26 // SkPDFFont::populateCIDFont().
27 // We can not pass over the system info object here because the format is
28 // different. This is not a reference object.
29 const char* kSysInfo =
30 "/CIDSystemInfo\n"
halcanary59be20c2016-09-01 14:10:00 -070031 "<< /Registry (Adobe)\n"
32 "/Ordering (UCS)\n"
halcanary8eccc302016-08-09 13:04:34 -070033 "/Supplement 0\n"
34 ">> def\n";
35 cmap->writeText(kSysInfo);
36
37 // The CMapName must be consistent to /CIDSystemInfo above.
38 // /CMapType 2 means ToUnicode.
39 // Codespace range just tells the PDF processor the valid range.
40 const char* kTypeInfoHeader =
halcanary59be20c2016-09-01 14:10:00 -070041 "/CMapName /Adobe-Identity-UCS def\n"
halcanary8eccc302016-08-09 13:04:34 -070042 "/CMapType 2 def\n"
43 "1 begincodespacerange\n";
44 cmap->writeText(kTypeInfoHeader);
halcanary3d01c622016-08-31 12:52:35 -070045 if (multibyte) {
46 cmap->writeText("<0000> <FFFF>\n");
47 } else {
48 cmap->writeText("<00> <FF>\n");
49 }
50 cmap->writeText("endcodespacerange\n");
halcanary8eccc302016-08-09 13:04:34 -070051}
52
53static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
54 const char kFooter[] =
55 "endcmap\n"
56 "CMapName currentdict /CMap defineresource pop\n"
57 "end\n"
58 "end";
59 cmap->writeText(kFooter);
60}
61
62namespace {
63struct BFChar {
64 SkGlyphID fGlyphId;
65 SkUnichar fUnicode;
66};
67
68struct BFRange {
69 SkGlyphID fStart;
70 SkGlyphID fEnd;
71 SkUnichar fUnicode;
72};
73} // namespace
74
halcanary3d01c622016-08-31 12:52:35 -070075static void write_glyph(SkDynamicMemoryWStream* cmap,
76 bool multiByte,
77 SkGlyphID gid) {
78 if (multiByte) {
79 SkPDFUtils::WriteUInt16BE(cmap, gid);
80 } else {
81 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
82 }
83}
84
Hal Canary9e41c212018-09-03 12:00:23 -040085static void append_bfchar_section(const std::vector<BFChar>& bfchar,
halcanary3d01c622016-08-31 12:52:35 -070086 bool multiByte,
halcanary8eccc302016-08-09 13:04:34 -070087 SkDynamicMemoryWStream* cmap) {
88 // PDF spec defines that every bf* list can have at most 100 entries.
Hal Canary9e41c212018-09-03 12:00:23 -040089 for (size_t i = 0; i < bfchar.size(); i += 100) {
90 int count = SkToInt(bfchar.size() - i);
halcanary8eccc302016-08-09 13:04:34 -070091 count = SkMin32(count, 100);
Hal Canary8b681102018-09-05 22:32:41 -040092 SkWStreamWriteDecAsText(cmap, count);
halcanary8eccc302016-08-09 13:04:34 -070093 cmap->writeText(" beginbfchar\n");
94 for (int j = 0; j < count; ++j) {
95 cmap->writeText("<");
halcanary3d01c622016-08-31 12:52:35 -070096 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
halcanary8eccc302016-08-09 13:04:34 -070097 cmap->writeText("> <");
halcanaryf59d18a2016-09-16 14:44:57 -070098 SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
halcanary8eccc302016-08-09 13:04:34 -070099 cmap->writeText(">\n");
100 }
101 cmap->writeText("endbfchar\n");
102 }
103}
104
Hal Canary9e41c212018-09-03 12:00:23 -0400105static void append_bfrange_section(const std::vector<BFRange>& bfrange,
halcanary3d01c622016-08-31 12:52:35 -0700106 bool multiByte,
halcanary8eccc302016-08-09 13:04:34 -0700107 SkDynamicMemoryWStream* cmap) {
108 // PDF spec defines that every bf* list can have at most 100 entries.
Hal Canary9e41c212018-09-03 12:00:23 -0400109 for (size_t i = 0; i < bfrange.size(); i += 100) {
110 int count = SkToInt(bfrange.size() - i);
halcanary8eccc302016-08-09 13:04:34 -0700111 count = SkMin32(count, 100);
Hal Canary8b681102018-09-05 22:32:41 -0400112 SkWStreamWriteDecAsText(cmap, count);
halcanary8eccc302016-08-09 13:04:34 -0700113 cmap->writeText(" beginbfrange\n");
114 for (int j = 0; j < count; ++j) {
115 cmap->writeText("<");
halcanary3d01c622016-08-31 12:52:35 -0700116 write_glyph(cmap, multiByte, bfrange[i + j].fStart);
halcanary8eccc302016-08-09 13:04:34 -0700117 cmap->writeText("> <");
halcanary3d01c622016-08-31 12:52:35 -0700118 write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
halcanary8eccc302016-08-09 13:04:34 -0700119 cmap->writeText("> <");
halcanaryf59d18a2016-09-16 14:44:57 -0700120 SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
halcanary8eccc302016-08-09 13:04:34 -0700121 cmap->writeText(">\n");
122 }
123 cmap->writeText("endbfrange\n");
124 }
125}
126
127// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
128// Technote 5014.
129// The function is not static so we can test it in unit tests.
130//
131// Current implementation guarantees bfchar and bfrange entries do not overlap.
132//
133// Current implementation does not attempt aggresive optimizations against
134// following case because the specification is not clear.
135//
136// 4 beginbfchar 1 beginbfchar
137// <0003> <0013> <0020> <0014>
138// <0005> <0015> to endbfchar
139// <0007> <0017> 1 beginbfrange
140// <0020> <0014> <0003> <0007> <0013>
141// endbfchar endbfrange
142//
143// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
144// overlap, but succeeding maps supersede preceding maps."
145//
146// In case of searching text in PDF, bfrange will have higher precedence so
147// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
148// the spec does not mention how will this kind of conflict being resolved.
149//
150// For the worst case (having 65536 continuous unicode and we use every other
151// one of them), the possible savings by aggressive optimization is 416KB
152// pre-compressed and does not provide enough motivation for implementation.
Hal Canary46cc3da2018-05-09 11:50:34 -0400153void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
halcanary530032a2016-08-18 14:22:52 -0700154 const SkBitSet* subset,
halcanary8eccc302016-08-09 13:04:34 -0700155 SkDynamicMemoryWStream* cmap,
156 bool multiByteGlyphs,
157 SkGlyphID firstGlyphID,
158 SkGlyphID lastGlyphID) {
halcanary8eccc302016-08-09 13:04:34 -0700159 int glyphOffset = 0;
160 if (!multiByteGlyphs) {
161 glyphOffset = firstGlyphID - 1;
162 }
163
Hal Canary9e41c212018-09-03 12:00:23 -0400164 std::vector<BFChar> bfcharEntries;
165 std::vector<BFRange> bfrangeEntries;
halcanary8eccc302016-08-09 13:04:34 -0700166
167 BFRange currentRangeEntry = {0, 0, 0};
168 bool rangeEmpty = true;
Hal Canary46cc3da2018-05-09 11:50:34 -0400169 const int limit = (int)lastGlyphID + 1 - glyphOffset;
halcanary8eccc302016-08-09 13:04:34 -0700170
171 for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
172 bool inSubset = i < limit &&
173 (subset == nullptr || subset->has(i + glyphOffset));
174 if (!rangeEmpty) {
175 // PDF spec requires bfrange not changing the higher byte,
176 // e.g. <1035> <10FF> <2222> is ok, but
177 // <1035> <1100> <2222> is no good
178 bool inRange =
179 i == currentRangeEntry.fEnd + 1 &&
180 i >> 8 == currentRangeEntry.fStart >> 8 &&
181 i < limit &&
182 glyphToUnicode[i + glyphOffset] ==
183 currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
184 if (!inSubset || !inRange) {
185 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
Mike Reed5edcd312018-08-08 11:23:41 -0400186 bfrangeEntries.push_back(currentRangeEntry);
halcanary8eccc302016-08-09 13:04:34 -0700187 } else {
Hal Canary9e41c212018-09-03 12:00:23 -0400188 bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
halcanary8eccc302016-08-09 13:04:34 -0700189 }
190 rangeEmpty = true;
191 }
192 }
193 if (inSubset) {
194 currentRangeEntry.fEnd = i;
195 if (rangeEmpty) {
196 currentRangeEntry.fStart = i;
197 currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
198 rangeEmpty = false;
199 }
200 }
201 }
202
203 // The spec requires all bfchar entries for a font must come before bfrange
204 // entries.
halcanary3d01c622016-08-31 12:52:35 -0700205 append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
206 append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
halcanary8eccc302016-08-09 13:04:34 -0700207}
208
209sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
Hal Canary46cc3da2018-05-09 11:50:34 -0400210 const SkUnichar* glyphToUnicode,
halcanary530032a2016-08-18 14:22:52 -0700211 const SkBitSet* subset,
halcanary8eccc302016-08-09 13:04:34 -0700212 bool multiByteGlyphs,
213 SkGlyphID firstGlyphID,
214 SkGlyphID lastGlyphID) {
215 SkDynamicMemoryWStream cmap;
halcanary3d01c622016-08-31 12:52:35 -0700216 append_tounicode_header(&cmap, multiByteGlyphs);
halcanary8eccc302016-08-09 13:04:34 -0700217 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
218 firstGlyphID, lastGlyphID);
219 append_cmap_footer(&cmap);
220 return sk_make_sp<SkPDFStream>(
221 std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
222}