blob: afe773207db38d63b2fca55e02dbcaa9820135c1 [file] [log] [blame]
halcanary8eccc302016-08-09 13:04:34 -07001/*
2 * Copyright 2011 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkPDFMakeToUnicodeCmap.h"
9#include "SkPDFUtils.h"
10#include "SkUtils.h"
11
12static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
halcanary3d01c622016-08-31 12:52:35 -070013 bool multibyte) {
halcanary8eccc302016-08-09 13:04:34 -070014 // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
15 // It's there to prevent old version Adobe Readers from malfunctioning.
16 const char* kHeader =
17 "/CIDInit /ProcSet findresource begin\n"
18 "12 dict begin\n"
19 "begincmap\n";
20 cmap->writeText(kHeader);
21
22 // The /CIDSystemInfo must be consistent to the one in
23 // SkPDFFont::populateCIDFont().
24 // We can not pass over the system info object here because the format is
25 // different. This is not a reference object.
26 const char* kSysInfo =
27 "/CIDSystemInfo\n"
halcanary59be20c2016-09-01 14:10:00 -070028 "<< /Registry (Adobe)\n"
29 "/Ordering (UCS)\n"
halcanary8eccc302016-08-09 13:04:34 -070030 "/Supplement 0\n"
31 ">> def\n";
32 cmap->writeText(kSysInfo);
33
34 // The CMapName must be consistent to /CIDSystemInfo above.
35 // /CMapType 2 means ToUnicode.
36 // Codespace range just tells the PDF processor the valid range.
37 const char* kTypeInfoHeader =
halcanary59be20c2016-09-01 14:10:00 -070038 "/CMapName /Adobe-Identity-UCS def\n"
halcanary8eccc302016-08-09 13:04:34 -070039 "/CMapType 2 def\n"
40 "1 begincodespacerange\n";
41 cmap->writeText(kTypeInfoHeader);
halcanary3d01c622016-08-31 12:52:35 -070042 if (multibyte) {
43 cmap->writeText("<0000> <FFFF>\n");
44 } else {
45 cmap->writeText("<00> <FF>\n");
46 }
47 cmap->writeText("endcodespacerange\n");
halcanary8eccc302016-08-09 13:04:34 -070048}
49
50static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
51 const char kFooter[] =
52 "endcmap\n"
53 "CMapName currentdict /CMap defineresource pop\n"
54 "end\n"
55 "end";
56 cmap->writeText(kFooter);
57}
58
59namespace {
60struct BFChar {
61 SkGlyphID fGlyphId;
62 SkUnichar fUnicode;
63};
64
65struct BFRange {
66 SkGlyphID fStart;
67 SkGlyphID fEnd;
68 SkUnichar fUnicode;
69};
70} // namespace
71
halcanary3d01c622016-08-31 12:52:35 -070072static void write_glyph(SkDynamicMemoryWStream* cmap,
73 bool multiByte,
74 SkGlyphID gid) {
75 if (multiByte) {
76 SkPDFUtils::WriteUInt16BE(cmap, gid);
77 } else {
78 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
79 }
80}
81
halcanary8eccc302016-08-09 13:04:34 -070082static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
halcanary3d01c622016-08-31 12:52:35 -070083 bool multiByte,
halcanary8eccc302016-08-09 13:04:34 -070084 SkDynamicMemoryWStream* cmap) {
85 // PDF spec defines that every bf* list can have at most 100 entries.
86 for (int i = 0; i < bfchar.count(); i += 100) {
87 int count = bfchar.count() - i;
88 count = SkMin32(count, 100);
89 cmap->writeDecAsText(count);
90 cmap->writeText(" beginbfchar\n");
91 for (int j = 0; j < count; ++j) {
92 cmap->writeText("<");
halcanary3d01c622016-08-31 12:52:35 -070093 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
halcanary8eccc302016-08-09 13:04:34 -070094 cmap->writeText("> <");
halcanaryf59d18a2016-09-16 14:44:57 -070095 SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
halcanary8eccc302016-08-09 13:04:34 -070096 cmap->writeText(">\n");
97 }
98 cmap->writeText("endbfchar\n");
99 }
100}
101
102static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
halcanary3d01c622016-08-31 12:52:35 -0700103 bool multiByte,
halcanary8eccc302016-08-09 13:04:34 -0700104 SkDynamicMemoryWStream* cmap) {
105 // PDF spec defines that every bf* list can have at most 100 entries.
106 for (int i = 0; i < bfrange.count(); i += 100) {
107 int count = bfrange.count() - i;
108 count = SkMin32(count, 100);
109 cmap->writeDecAsText(count);
110 cmap->writeText(" beginbfrange\n");
111 for (int j = 0; j < count; ++j) {
112 cmap->writeText("<");
halcanary3d01c622016-08-31 12:52:35 -0700113 write_glyph(cmap, multiByte, bfrange[i + j].fStart);
halcanary8eccc302016-08-09 13:04:34 -0700114 cmap->writeText("> <");
halcanary3d01c622016-08-31 12:52:35 -0700115 write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
halcanary8eccc302016-08-09 13:04:34 -0700116 cmap->writeText("> <");
halcanaryf59d18a2016-09-16 14:44:57 -0700117 SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
halcanary8eccc302016-08-09 13:04:34 -0700118 cmap->writeText(">\n");
119 }
120 cmap->writeText("endbfrange\n");
121 }
122}
123
124// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
125// Technote 5014.
126// The function is not static so we can test it in unit tests.
127//
128// Current implementation guarantees bfchar and bfrange entries do not overlap.
129//
130// Current implementation does not attempt aggresive optimizations against
131// following case because the specification is not clear.
132//
133// 4 beginbfchar 1 beginbfchar
134// <0003> <0013> <0020> <0014>
135// <0005> <0015> to endbfchar
136// <0007> <0017> 1 beginbfrange
137// <0020> <0014> <0003> <0007> <0013>
138// endbfchar endbfrange
139//
140// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
141// overlap, but succeeding maps supersede preceding maps."
142//
143// In case of searching text in PDF, bfrange will have higher precedence so
144// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
145// the spec does not mention how will this kind of conflict being resolved.
146//
147// For the worst case (having 65536 continuous unicode and we use every other
148// one of them), the possible savings by aggressive optimization is 416KB
149// pre-compressed and does not provide enough motivation for implementation.
150void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
halcanary530032a2016-08-18 14:22:52 -0700151 const SkBitSet* subset,
halcanary8eccc302016-08-09 13:04:34 -0700152 SkDynamicMemoryWStream* cmap,
153 bool multiByteGlyphs,
154 SkGlyphID firstGlyphID,
155 SkGlyphID lastGlyphID) {
156 if (glyphToUnicode.isEmpty()) {
157 return;
158 }
159 int glyphOffset = 0;
160 if (!multiByteGlyphs) {
161 glyphOffset = firstGlyphID - 1;
162 }
163
164 SkTDArray<BFChar> bfcharEntries;
165 SkTDArray<BFRange> bfrangeEntries;
166
167 BFRange currentRangeEntry = {0, 0, 0};
168 bool rangeEmpty = true;
169 const int limit =
170 SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
171
172 for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
173 bool inSubset = i < limit &&
174 (subset == nullptr || subset->has(i + glyphOffset));
175 if (!rangeEmpty) {
176 // PDF spec requires bfrange not changing the higher byte,
177 // e.g. <1035> <10FF> <2222> is ok, but
178 // <1035> <1100> <2222> is no good
179 bool inRange =
180 i == currentRangeEntry.fEnd + 1 &&
181 i >> 8 == currentRangeEntry.fStart >> 8 &&
182 i < limit &&
183 glyphToUnicode[i + glyphOffset] ==
184 currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
185 if (!inSubset || !inRange) {
186 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
187 bfrangeEntries.push(currentRangeEntry);
188 } else {
189 BFChar* entry = bfcharEntries.append();
190 entry->fGlyphId = currentRangeEntry.fStart;
191 entry->fUnicode = currentRangeEntry.fUnicode;
192 }
193 rangeEmpty = true;
194 }
195 }
196 if (inSubset) {
197 currentRangeEntry.fEnd = i;
198 if (rangeEmpty) {
199 currentRangeEntry.fStart = i;
200 currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
201 rangeEmpty = false;
202 }
203 }
204 }
205
206 // The spec requires all bfchar entries for a font must come before bfrange
207 // entries.
halcanary3d01c622016-08-31 12:52:35 -0700208 append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
209 append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
halcanary8eccc302016-08-09 13:04:34 -0700210}
211
212sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
213 const SkTDArray<SkUnichar>& glyphToUnicode,
halcanary530032a2016-08-18 14:22:52 -0700214 const SkBitSet* subset,
halcanary8eccc302016-08-09 13:04:34 -0700215 bool multiByteGlyphs,
216 SkGlyphID firstGlyphID,
217 SkGlyphID lastGlyphID) {
218 SkDynamicMemoryWStream cmap;
halcanary3d01c622016-08-31 12:52:35 -0700219 append_tounicode_header(&cmap, multiByteGlyphs);
halcanary8eccc302016-08-09 13:04:34 -0700220 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
221 firstGlyphID, lastGlyphID);
222 append_cmap_footer(&cmap);
223 return sk_make_sp<SkPDFStream>(
224 std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
225}