Blame - src/pdf/SkPDFMakeToUnicodeCmap.cpp - platform/external/skqp

blob: afe773207db38d63b2fca55e02dbcaa9820135c1 [file] [log] [blame]

halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	1	/*
				2	* Copyright 2011 Google Inc.
				3	*
				4	* Use of this source code is governed by a BSD-style license that can be
				5	* found in the LICENSE file.
				6	*/
				7
				8	#include "SkPDFMakeToUnicodeCmap.h"
				9	#include "SkPDFUtils.h"
				10	#include "SkUtils.h"
				11
				12	static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	13	bool multibyte) {
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	14	// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
				15	// It's there to prevent old version Adobe Readers from malfunctioning.
				16	const char* kHeader =
				17	"/CIDInit /ProcSet findresource begin\n"
				18	"12 dict begin\n"
				19	"begincmap\n";
				20	cmap->writeText(kHeader);
				21
				22	// The /CIDSystemInfo must be consistent to the one in
				23	// SkPDFFont::populateCIDFont().
				24	// We can not pass over the system info object here because the format is
				25	// different. This is not a reference object.
				26	const char* kSysInfo =
				27	"/CIDSystemInfo\n"
halcanary	59be20c	2016-09-01 14:10:00 -0700	[diff] [blame]	28	"<< /Registry (Adobe)\n"
				29	"/Ordering (UCS)\n"
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	30	"/Supplement 0\n"
				31	">> def\n";
				32	cmap->writeText(kSysInfo);
				33
				34	// The CMapName must be consistent to /CIDSystemInfo above.
				35	// /CMapType 2 means ToUnicode.
				36	// Codespace range just tells the PDF processor the valid range.
				37	const char* kTypeInfoHeader =
halcanary	59be20c	2016-09-01 14:10:00 -0700	[diff] [blame]	38	"/CMapName /Adobe-Identity-UCS def\n"
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	39	"/CMapType 2 def\n"
				40	"1 begincodespacerange\n";
				41	cmap->writeText(kTypeInfoHeader);
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	42	if (multibyte) {
				43	cmap->writeText("<0000> <FFFF>\n");
				44	} else {
				45	cmap->writeText("<00> <FF>\n");
				46	}
				47	cmap->writeText("endcodespacerange\n");
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	48	}
				49
				50	static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
				51	const char kFooter[] =
				52	"endcmap\n"
				53	"CMapName currentdict /CMap defineresource pop\n"
				54	"end\n"
				55	"end";
				56	cmap->writeText(kFooter);
				57	}
				58
				59	namespace {
				60	struct BFChar {
				61	SkGlyphID fGlyphId;
				62	SkUnichar fUnicode;
				63	};
				64
				65	struct BFRange {
				66	SkGlyphID fStart;
				67	SkGlyphID fEnd;
				68	SkUnichar fUnicode;
				69	};
				70	} // namespace
				71
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	72	static void write_glyph(SkDynamicMemoryWStream* cmap,
				73	bool multiByte,
				74	SkGlyphID gid) {
				75	if (multiByte) {
				76	SkPDFUtils::WriteUInt16BE(cmap, gid);
				77	} else {
				78	SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
				79	}
				80	}
				81
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	82	static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	83	bool multiByte,
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	84	SkDynamicMemoryWStream* cmap) {
				85	// PDF spec defines that every bf* list can have at most 100 entries.
				86	for (int i = 0; i < bfchar.count(); i += 100) {
				87	int count = bfchar.count() - i;
				88	count = SkMin32(count, 100);
				89	cmap->writeDecAsText(count);
				90	cmap->writeText(" beginbfchar\n");
				91	for (int j = 0; j < count; ++j) {
				92	cmap->writeText("<");
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	93	write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	94	cmap->writeText("> <");
halcanary	f59d18a	2016-09-16 14:44:57 -0700	[diff] [blame]	95	SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	96	cmap->writeText(">\n");
				97	}
				98	cmap->writeText("endbfchar\n");
				99	}
				100	}
				101
				102	static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	103	bool multiByte,
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	104	SkDynamicMemoryWStream* cmap) {
				105	// PDF spec defines that every bf* list can have at most 100 entries.
				106	for (int i = 0; i < bfrange.count(); i += 100) {
				107	int count = bfrange.count() - i;
				108	count = SkMin32(count, 100);
				109	cmap->writeDecAsText(count);
				110	cmap->writeText(" beginbfrange\n");
				111	for (int j = 0; j < count; ++j) {
				112	cmap->writeText("<");
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	113	write_glyph(cmap, multiByte, bfrange[i + j].fStart);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	114	cmap->writeText("> <");
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	115	write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	116	cmap->writeText("> <");
halcanary	f59d18a	2016-09-16 14:44:57 -0700	[diff] [blame]	117	SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	118	cmap->writeText(">\n");
				119	}
				120	cmap->writeText("endbfrange\n");
				121	}
				122	}
				123
				124	// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
				125	// Technote 5014.
				126	// The function is not static so we can test it in unit tests.
				127	//
				128	// Current implementation guarantees bfchar and bfrange entries do not overlap.
				129	//
				130	// Current implementation does not attempt aggresive optimizations against
				131	// following case because the specification is not clear.
				132	//
				133	// 4 beginbfchar 1 beginbfchar
				134	// <0003> <0013> <0020> <0014>
				135	// <0005> <0015> to endbfchar
				136	// <0007> <0017> 1 beginbfrange
				137	// <0020> <0014> <0003> <0007> <0013>
				138	// endbfchar endbfrange
				139	//
				140	// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
				141	// overlap, but succeeding maps supersede preceding maps."
				142	//
				143	// In case of searching text in PDF, bfrange will have higher precedence so
				144	// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
				145	// the spec does not mention how will this kind of conflict being resolved.
				146	//
				147	// For the worst case (having 65536 continuous unicode and we use every other
				148	// one of them), the possible savings by aggressive optimization is 416KB
				149	// pre-compressed and does not provide enough motivation for implementation.
				150	void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
halcanary	530032a	2016-08-18 14:22:52 -0700	[diff] [blame]	151	const SkBitSet* subset,
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	152	SkDynamicMemoryWStream* cmap,
				153	bool multiByteGlyphs,
				154	SkGlyphID firstGlyphID,
				155	SkGlyphID lastGlyphID) {
				156	if (glyphToUnicode.isEmpty()) {
				157	return;
				158	}
				159	int glyphOffset = 0;
				160	if (!multiByteGlyphs) {
				161	glyphOffset = firstGlyphID - 1;
				162	}
				163
				164	SkTDArray<BFChar> bfcharEntries;
				165	SkTDArray<BFRange> bfrangeEntries;
				166
				167	BFRange currentRangeEntry = {0, 0, 0};
				168	bool rangeEmpty = true;
				169	const int limit =
				170	SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
				171
				172	for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
				173	bool inSubset = i < limit &&
				174	(subset == nullptr \|\| subset->has(i + glyphOffset));
				175	if (!rangeEmpty) {
				176	// PDF spec requires bfrange not changing the higher byte,
				177	// e.g. <1035> <10FF> <2222> is ok, but
				178	// <1035> <1100> <2222> is no good
				179	bool inRange =
				180	i == currentRangeEntry.fEnd + 1 &&
				181	i >> 8 == currentRangeEntry.fStart >> 8 &&
				182	i < limit &&
				183	glyphToUnicode[i + glyphOffset] ==
				184	currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
				185	if (!inSubset \|\| !inRange) {
				186	if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
				187	bfrangeEntries.push(currentRangeEntry);
				188	} else {
				189	BFChar* entry = bfcharEntries.append();
				190	entry->fGlyphId = currentRangeEntry.fStart;
				191	entry->fUnicode = currentRangeEntry.fUnicode;
				192	}
				193	rangeEmpty = true;
				194	}
				195	}
				196	if (inSubset) {
				197	currentRangeEntry.fEnd = i;
				198	if (rangeEmpty) {
				199	currentRangeEntry.fStart = i;
				200	currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
				201	rangeEmpty = false;
				202	}
				203	}
				204	}
				205
				206	// The spec requires all bfchar entries for a font must come before bfrange
				207	// entries.
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	208	append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
				209	append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	210	}
				211
				212	sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
				213	const SkTDArray<SkUnichar>& glyphToUnicode,
halcanary	530032a	2016-08-18 14:22:52 -0700	[diff] [blame]	214	const SkBitSet* subset,
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	215	bool multiByteGlyphs,
				216	SkGlyphID firstGlyphID,
				217	SkGlyphID lastGlyphID) {
				218	SkDynamicMemoryWStream cmap;
halcanary	3d01c62	2016-08-31 12:52:35 -0700	[diff] [blame]	219	append_tounicode_header(&cmap, multiByteGlyphs);
halcanary	8eccc30	2016-08-09 13:04:34 -0700	[diff] [blame]	220	SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
				221	firstGlyphID, lastGlyphID);
				222	append_cmap_footer(&cmap);
				223	return sk_make_sp<SkPDFStream>(
				224	std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
				225	}