Blame - experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - platform/external/skia

blob: cc5788bfd796ba4ea93dc29b2d9c4fef92e52095 [file] [log] [blame]

edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	1
				2	#include "SkPdfNativeTokenizer.h"
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	3	#include "SkPdfObject.h"
				4	#include "SkPdfConfig.h"
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	5
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	6	#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	7	#include "SkPdfImageDictionary_autogen.h"
				8
				9	// TODO(edisonn): perf!!!
				10	// there could be 0s between start and end! but not in the needle.
				11	static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
				12	int needleLen = strlen(needle);
				13	if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				14	strncmp(hayStart, needle, needleLen) == 0) {
				15	return hayStart;
				16	}
				17
				18	hayStart++;
				19
				20	while (hayStart < hayEnd) {
				21	if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
				22	(isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				23	strncmp(hayStart, needle, needleLen) == 0) {
				24	return hayStart;
				25	}
				26	hayStart++;
				27	}
				28	return NULL;
				29	}
				30
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	31	#ifdef PDF_TRACE
				32	static void TRACE_INDENT(int level, const char* type) {
				33	static int id = 0;
				34	id++;
				35	if (478613 == id) {
				36	printf("break;\n");
				37	}
				38	// all types should have 2 letters, so the text is alligned nicely
				39	printf("\n%10i %15s: ", id, type);
				40	for (int i = 0 ; i < level; i++) {
				41	printf(" ");
				42	}
				43	}
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	44
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	45	static void TRACE_COMMENT(char ch) {
				46	printf("%c", ch);
				47	}
				48
				49	static void TRACE_TK(char ch) {
				50	printf("%c", ch);
				51	}
				52
				53	static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
				54	while (start < end) {
				55	printf("%c", *start);
				56	start++;
				57	}
				58	printf("\n");
				59	}
				60
				61	static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
				62	while (start < end) {
				63	printf("%c", *start);
				64	start++;
				65	}
				66	printf("\n");
				67	}
				68
				69	static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
				70	while (start < end) {
				71	printf("%c", *start);
				72	start++;
				73	}
				74	printf("\n");
				75	}
				76
				77	#else
				78	#define TRACE_INDENT(level,type)
				79	#define TRACE_COMMENT(ch)
				80	#define TRACE_TK(ch)
				81	#define TRACE_NAME(start,end)
				82	#define TRACE_STRING(start,end)
				83	#define TRACE_HEXSTRING(start,end)
				84	#endif
				85
				86	static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
				87	TRACE_INDENT(level, "White Space");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	88	while (start < end && isPdfWhiteSpace(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	89	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	90	if (*start == kComment_PdfDelimiter) {
				91	// skip the comment until end of line
				92	while (start < end && !isPdfEOL(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	93	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	94	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	95	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	96	}
				97	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	98	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	99	start++;
				100	}
				101	}
				102	return start;
				103	}
				104
				105	// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	106	static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	107	//int opened brackets
				108	//TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	109	TRACE_INDENT(level, "Token");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	110
				111	SkASSERT(!isPdfWhiteSpace(*start));
				112
				113	if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	114	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	115	start++;
				116	return start;
				117	}
				118
				119	while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	120	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	121	start++;
				122	}
				123	return start;
				124	}
				125
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	126	// last elem has to be ]
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	127	static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
				128	TRACE_INDENT(level, "Array");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	129	while (start < end) {
				130	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	131	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	132
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	133	const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	134
				135	if (endOfToken == start) {
				136	// TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
				137	return start;
				138	}
				139
				140	if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
				141	return endOfToken;
				142	}
				143
				144	SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	145	start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	146	// TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
				147	// we are sure they are not references!
				148	if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
				149	SkPdfObject* gen = array->removeLastInArray();
				150	SkPdfObject* id = array->removeLastInArray();
				151	newObj->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	152	SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	153	}
				154	array->appendInArray(newObj);
				155	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	156	printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	157	// TODO(edisonn): report not reached, we should never get here
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	158	// TODO(edisonn): there might be a bug here, enable an assert and run it on files
				159	// or it might be that the files were actually corrupted
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	160	return start;
				161	}
				162
				163	// When we read strings we will rewrite the string so we will reuse the memory
				164	// when we start to read the string, we already consumed the opened bracket
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	165
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	166	// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
				167
				168	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				169	TRACE_INDENT(level, "String");
				170	const unsigned char* in = start;
				171	bool hasOut = (out != NULL);
				172
				173	int openRoundBrackets = 1;
				174	while (in < end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	175	openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
				176	openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	177	if (openRoundBrackets == 0) {
				178	in++; // consumed )
				179	break;
				180	}
				181
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	182	if (*in == kEscape_PdfSpecial) {
				183	if (in + 1 < end) {
				184	switch (in[1]) {
				185	case 'n':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	186	if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	187	out++;
				188	in += 2;
				189	break;
				190
				191	case 'r':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	192	if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	193	out++;
				194	in += 2;
				195	break;
				196
				197	case 't':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	198	if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	199	out++;
				200	in += 2;
				201	break;
				202
				203	case 'b':
				204	// TODO(edisonn): any special meaning to backspace?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	205	if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	206	out++;
				207	in += 2;
				208	break;
				209
				210	case 'f':
				211	*out = kFF_PdfWhiteSpace;
				212	out++;
				213	in += 2;
				214	break;
				215
				216	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	217	if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	218	out++;
				219	in += 2;
				220	break;
				221
				222	case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	223	if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	224	out++;
				225	in += 2;
				226	break;
				227
				228	case kEscape_PdfSpecial:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	229	if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	230	out++;
				231	in += 2;
				232	break;
				233
				234	case '0':
				235	case '1':
				236	case '2':
				237	case '3':
				238	case '4':
				239	case '5':
				240	case '6':
				241	case '7': {
				242	//read octals
				243	in++; // consume backslash
				244
				245	int code = 0;
				246	int i = 0;
				247	while (in < end && in >= '0' && in < '8') {
				248	code = (code << 3) + ((in) - '0'); // code 8 + d
				249	i++;
				250	in++;
				251	if (i == 3) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	252	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	253	out++;
				254	i = 0;
				255	}
				256	}
				257	if (i > 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	258	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	259	out++;
				260	}
				261	}
				262	break;
				263
				264	default:
				265	// Per spec, backslash is ignored is escaped ch is unknown
				266	in++;
				267	break;
				268	}
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	269	} else {
				270	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	271	}
				272	} else {
				273	// TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
				274	// we could have one look that first just inc current, and when we find the backslash
				275	// we go to this loop
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	276	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	277	in++;
				278	out++;
				279	}
				280	}
				281
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	282	if (hasOut) {
				283	return in; // consumed already ) at the end of the string
				284	} else {
				285	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				286	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	287	}
				288
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	289	static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
				290	return readString(level, start, end, NULL) - start;
				291	}
				292
				293	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame^]	294	if (!allocator) {
				295	return end;
				296	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	297	int outLength = readStringLength(level, start, end);
				298	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				299	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				300	start = readString(level, start, end, out);
				301	SkPdfObject::makeString(out, out + outLength, str);
				302	TRACE_STRING(out, out + outLength);
				303	return start; // consumed already ) at the end of the string
				304	}
				305
				306	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				307	TRACE_INDENT(level, "HexString");
				308	bool hasOut = (out != NULL);
				309	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	310
				311	unsigned char code = 0;
				312
				313	while (in < end) {
				314	while (in < end && isPdfWhiteSpace(*in)) {
				315	in++;
				316	}
				317
				318	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	319	//*in = '\0';
				320	in++; // consume >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	321	// normal exit
				322	break;
				323	}
				324
				325	if (in >= end) {
				326	// end too soon
				327	break;
				328	}
				329
				330	switch (*in) {
				331	case '0':
				332	case '1':
				333	case '2':
				334	case '3':
				335	case '4':
				336	case '5':
				337	case '6':
				338	case '7':
				339	case '8':
				340	case '9':
				341	code = (*in - '0') << 4;
				342	break;
				343
				344	case 'a':
				345	case 'b':
				346	case 'c':
				347	case 'd':
				348	case 'e':
				349	case 'f':
				350	code = (*in - 'a' + 10) << 4;
				351	break;
				352
				353	case 'A':
				354	case 'B':
				355	case 'C':
				356	case 'D':
				357	case 'E':
				358	case 'F':
				359	code = (*in - 'A' + 10) << 4;
				360	break;
				361
				362	// TODO(edisonn): spec does not say how to handle this error
				363	default:
				364	break;
				365	}
				366
				367	in++; // advance
				368
				369	while (in < end && isPdfWhiteSpace(*in)) {
				370	in++;
				371	}
				372
				373	// TODO(edisonn): report error
				374	if (in >= end) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	375	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	376	out++;
				377	break;
				378	}
				379
				380	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	381	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	382	out++;
				383	break;
				384	}
				385
				386	switch (*in) {
				387	case '0':
				388	case '1':
				389	case '2':
				390	case '3':
				391	case '4':
				392	case '5':
				393	case '6':
				394	case '7':
				395	case '8':
				396	case '9':
				397	code += (*in - '0');
				398	break;
				399
				400	case 'a':
				401	case 'b':
				402	case 'c':
				403	case 'd':
				404	case 'e':
				405	case 'f':
				406	code += (*in - 'a' + 10);
				407	break;
				408
				409	case 'A':
				410	case 'B':
				411	case 'C':
				412	case 'D':
				413	case 'E':
				414	case 'F':
				415	code += (*in - 'A' + 10);
				416	break;
				417
				418	// TODO(edisonn): spec does not say how to handle this error
				419	default:
				420	break;
				421	}
				422
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	423	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	424	out++;
				425	in++;
				426	}
				427
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	428	if (hasOut) {
				429	return in; // consumed already > at the end of the string
				430	} else {
				431	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	432	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	433	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	434
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	435	static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
				436	return readHexString(level, start, end, NULL) - start;
				437	}
				438
				439	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame^]	440	if (!allocator) {
				441	return end;
				442	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	443	int outLength = readHexStringLength(level, start, end);
				444	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				445	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				446	start = readHexString(level, start, end, out);
				447	SkPdfObject::makeHexString(out, out + outLength, str);
				448	TRACE_HEXSTRING(out, out + outLength);
				449	return start; // consumed already > at the end of the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	450	}
				451
				452	// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	453	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				454	TRACE_INDENT(level, "Name");
				455	bool hasOut = (out != NULL);
				456	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	457
				458	unsigned char code = 0;
				459
				460	while (in < end) {
				461	if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
				462	break;
				463	}
				464
				465	if (*in == '#' && in + 2 < end) {
				466	in++;
				467	switch (*in) {
				468	case '0':
				469	case '1':
				470	case '2':
				471	case '3':
				472	case '4':
				473	case '5':
				474	case '6':
				475	case '7':
				476	case '8':
				477	case '9':
				478	code = (*in - '0') << 4;
				479	break;
				480
				481	case 'a':
				482	case 'b':
				483	case 'c':
				484	case 'd':
				485	case 'e':
				486	case 'f':
				487	code = (*in - 'a' + 10) << 4;
				488	break;
				489
				490	case 'A':
				491	case 'B':
				492	case 'C':
				493	case 'D':
				494	case 'E':
				495	case 'F':
				496	code = (*in - 'A' + 10) << 4;
				497	break;
				498
				499	// TODO(edisonn): spec does not say how to handle this error
				500	default:
				501	break;
				502	}
				503
				504	in++; // advance
				505
				506	switch (*in) {
				507	case '0':
				508	case '1':
				509	case '2':
				510	case '3':
				511	case '4':
				512	case '5':
				513	case '6':
				514	case '7':
				515	case '8':
				516	case '9':
				517	code += (*in - '0');
				518	break;
				519
				520	case 'a':
				521	case 'b':
				522	case 'c':
				523	case 'd':
				524	case 'e':
				525	case 'f':
				526	code += (*in - 'a' + 10);
				527	break;
				528
				529	case 'A':
				530	case 'B':
				531	case 'C':
				532	case 'D':
				533	case 'E':
				534	case 'F':
				535	code += (*in - 'A' + 10);
				536	break;
				537
				538	// TODO(edisonn): spec does not say how to handle this error
				539	default:
				540	break;
				541	}
				542
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	543	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	544	out++;
				545	in++;
				546	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	547	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	548	out++;
				549	in++;
				550	}
				551	}
				552
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	553	if (hasOut) {
				554	return in;
				555	} else {
				556	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				557	}
				558	}
				559
				560	static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
				561	return readName(level, start, end, NULL) - start;
				562	}
				563
				564	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame^]	565	if (!allocator) {
				566	return end;
				567	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	568	int outLength = readNameLength(level, start, end);
				569	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				570	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				571	start = readName(level, start, end, out);
				572	SkPdfObject::makeName(out, out + outLength, name);
				573	TRACE_NAME(out, out + outLength);
				574	return start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	575	}
				576
				577	// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
				578	// that makes for an interesting scenario, where the stream itself contains endstream, together
				579	// with a reference object with the length, but the real length object would be somewhere else
				580	// it could confuse the parser
				581	/*example:
				582
				583	7 0 obj
				584	<< /length 8 0 R>>
				585	stream
				586	...............
				587	endstream
				588	8 0 obj #we are in stream actually, not a real object
				589	<< 10 >> #we are in stream actually, not a real object
				590	endobj
				591	endstream
				592	8 0 obj #real obj
				593	<< 100 >> #real obj
				594	endobj
				595	and it could get worse, with multiple object like this
				596	*/
				597
				598	// right now implement the silly algorithm that assumes endstream is finishing the stream
				599
				600
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	601	static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
				602	TRACE_INDENT(level, "Stream");
				603	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	604	if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
				605	// no stream. return.
				606	return start;
				607	}
				608
				609	start += 6; // strlen("stream")
				610	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				611	start += 2;
				612	} else if (start[0] == kLF_PdfWhiteSpace) {
				613	start += 1;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	614	} else if (isPdfWhiteSpace(start[0])) {
				615	start += 1;
				616	} else {
				617	// TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
				618	// TODO(edisonn): warning?
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	619	}
				620
				621	SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
				622	// TODO(edisonn): load Length
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	623	int64_t length = -1;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	624
				625	// TODO(edisonn): very basic implementation
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	626	if (stream->has_Length() && stream->Length(doc) > 0) {
				627	length = stream->Length(doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	628	}
				629
				630	// TODO(edisonn): laod external streams
				631	// TODO(edisonn): look at the last filter, to determione how to deal with possible issue
				632
				633	if (length < 0) {
				634	// scan the buffer, until we find first endstream
				635	// TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	636	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "endstream");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	637
				638	if (endstream) {
				639	length = endstream - start;
				640	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	641	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	642	}
				643	}
				644	if (length >= 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	645	const unsigned char* endstream = start + length;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	646
				647	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				648	endstream += 2;
				649	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				650	endstream += 1;
				651	}
				652
				653	// TODO(edisonn): verify the next bytes are "endstream"
				654
				655	endstream += strlen("endstream");
				656	// TODO(edisonn): Assert? report error/warning?
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	657	dict->addStream(start, (size_t)length);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	658	return endstream;
				659	}
				660	return start;
				661	}
				662
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	663	static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
				664	TRACE_INDENT(level, "Inline Image");
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	665	// We already processed ID keyword, and we should be positioned immediately after it
				666
				667	// TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
				668	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				669	start += 2;
				670	} else if (start[0] == kLF_PdfWhiteSpace) {
				671	start += 1;
				672	} else if (isPdfWhiteSpace(start[0])) {
				673	start += 1;
				674	} else {
				675	SkASSERT(isPdfDelimiter(start[0]));
				676	// TODO(edisonn): warning?
				677	}
				678
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	679	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "EI");
				680	const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	681
				682	if (endstream) {
				683	int length = endstream - start;
				684	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
				685	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
				686	inlineImage->addStream(start, (size_t)length);
				687	} else {
				688	// TODO(edisonn): report error in inline image stream (ID-EI) section
				689	// TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
				690	return end;
				691	}
				692	return endEI;
				693	}
				694
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	695	static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
				696	TRACE_INDENT(level, "Dictionary");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	697	SkPdfObject::makeEmptyDictionary(dict);
				698
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	699	start = skipPdfWhiteSpaces(level, start, end);
				700	SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	701
				702	while (start < end && *start == kNamed_PdfDelimiter) {
				703	SkPdfObject key;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	704	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	705	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	706	start = readName(level + 1, start, end, &key, &tmpStorage);
				707	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	708
				709	if (start < end) {
				710	SkPdfObject* value = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	711	start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	712
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	713	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	714
				715	if (start < end) {
				716	// seems we have an indirect reference
				717	if (isPdfDigit(*start)) {
				718	SkPdfObject generation;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	719	start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	720
				721	SkPdfObject keywordR;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	722	start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	723
				724	if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
				725	int64_t id = value->intValue();
				726	value->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	727	SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	728	dict->set(&key, value);
				729	} else {
				730	// error, ignore
				731	dict->set(&key, value);
				732	}
				733	} else {
				734	// next elem is not a digit, but it might not be / either!
				735	dict->set(&key, value);
				736	}
				737	} else {
				738	// /key >>
				739	dict->set(&key, value);
				740	return end;
				741	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	742	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	743	} else {
				744	dict->set(&key, &SkPdfObject::kNull);
				745	return end;
				746	}
				747	}
				748
				749	// TODO(edisonn): options to ignore these errors
				750
				751	// now we should expect >>
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	752	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	753	if (*start != kClosedInequityBracket_PdfDelimiter) {
				754	// TODO(edisonn): report/warning
				755	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	756	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	757	start++; // skip >
				758	if (*start != kClosedInequityBracket_PdfDelimiter) {
				759	// TODO(edisonn): report/warning
				760	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	761	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	762	start++; // skip >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	763
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	764	start = readStream(level, start, end, dict, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	765
				766	return start;
				767	}
				768
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	769	const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
				770	const unsigned char* current;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	771
				772	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	773	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	774
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	775	current = endOfPdfToken(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	776
				777	// no token, len would be 0
				778	if (current == start) {
				779	return NULL;
				780	}
				781
				782	int tokenLen = current - start;
				783
				784	if (tokenLen == 1) {
				785	// start array
				786	switch (*start) {
				787	case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	788	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	789	SkPdfObject::makeEmptyArray(token);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	790	return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	791
				792	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	793	//*start = '\0';
				794	return readString(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	795
				796	case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	797	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	798	if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	799	//start[1] = '\0'; // optional
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	800	// TODO(edisonn): pass here the length somehow?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	801	return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	802	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	803	return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	804	}
				805
				806	case kNamed_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	807	//*start = '\0';
				808	return readName(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	809
				810	// TODO(edisonn): what to do curly brackets? read spec!
				811	case kOpenedCurlyBracket_PdfDelimiter:
				812	default:
				813	break;
				814	}
				815
				816	SkASSERT(!isPdfWhiteSpace(*start));
				817	if (isPdfDelimiter(*start)) {
				818	// TODO(edisonn): how stream ] } > ) will be handled?
				819	// for now ignore, and it will become a keyword to be ignored
				820	}
				821	}
				822
				823	if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
				824	SkPdfObject::makeNull(token);
				825	return current;
				826	}
				827
				828	if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
				829	SkPdfObject::makeBoolean(true, token);
				830	return current;
				831	}
				832
				833	if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
				834	SkPdfObject::makeBoolean(false, token);
				835	return current;
				836	}
				837
				838	if (isPdfNumeric(*start)) {
				839	SkPdfObject::makeNumeric(start, current, token);
				840	} else {
				841	SkPdfObject::makeKeyword(start, current, token);
				842	}
				843	return current;
				844	}
				845
				846	SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	847	fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	848	return new SkPdfObject[BUFFER_SIZE];
				849	}
				850
				851	SkPdfAllocator::~SkPdfAllocator() {
				852	for (int i = 0 ; i < fHandles.count(); i++) {
				853	free(fHandles[i]);
				854	}
				855	for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	856	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				857	fHistory[i][j].reset();
				858	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	859	delete[] fHistory[i];
				860	}
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	861	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				862	fCurrent[j].reset();
				863	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	864	delete[] fCurrent;
				865	}
				866
				867	SkPdfObject* SkPdfAllocator::allocObject() {
				868	if (fCurrentUsed >= BUFFER_SIZE) {
				869	fHistory.push(fCurrent);
				870	fCurrent = allocBlock();
				871	fCurrentUsed = 0;
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	872	fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	873	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	874	fCurrentUsed++;
				875	return &fCurrent[fCurrentUsed - 1];
				876	}
				877
				878	// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	879	SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	880	const unsigned char* buffer = NULL;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	881	size_t len = 0;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	882	objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	883	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	884	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	885	if (endobj) {
				886	len = endobj - (char*)buffer + strlen("endobj");
				887	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	888	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	889	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	890	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	891
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	892	SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	893	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	894	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	895	if (endobj) {
				896	len = endobj - (char*)buffer + strlen("endobj");
				897	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	898	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	899	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	900	}
				901
				902	SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	903	}
				904
				905	bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
				906	token->fKeyword = NULL;
				907	token->fObject = NULL;
				908
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	909	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	910	if (fUncompressedStream >= fUncompressedStreamEnd) {
				911	return false;
				912	}
				913
				914	SkPdfObject obj;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	915	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	916
				917	// If it is a keyword, we will only get the pointer of the string
				918	if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
				919	token->fKeyword = obj.c_str();
				920	token->fKeywordLength = obj.len();
				921	token->fType = kKeyword_TokenType;
				922	} else {
				923	SkPdfObject* pobj = fAllocator->allocObject();
				924	*pobj = obj;
				925	token->fObject = pobj;
				926	token->fType = kObject_TokenType;
				927	}
				928
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	929	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	930	static int read_op = 0;
				931	read_op++;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	932	if (548 == read_op) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	933	printf("break;\n");
				934	}
				935	printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				936	#endif
				937
				938	return true;
				939	}
				940
				941	void SkPdfNativeTokenizer::PutBack(PdfToken token) {
				942	SkASSERT(!fHasPutBack);
				943	fHasPutBack = true;
				944	fPutBack = token;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	945	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	946	printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
				947	#endif
				948	}
				949
				950	bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
				951	if (fHasPutBack) {
				952	*token = fPutBack;
				953	fHasPutBack = false;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	954	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	955	printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				956	#endif
				957	return true;
				958	}
				959
				960	if (fEmpty) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	961	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	962	printf("EMPTY TOKENIZER\n");
				963	#endif
				964	return false;
				965	}
				966
				967	return readTokenCore(token);
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	968	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	969
				970	#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
				971
				972	// keys
				973	DECLARE_PDF_NAME(BitsPerComponent);
				974	DECLARE_PDF_NAME(ColorSpace);
				975	DECLARE_PDF_NAME(Decode);
				976	DECLARE_PDF_NAME(DecodeParms);
				977	DECLARE_PDF_NAME(Filter);
				978	DECLARE_PDF_NAME(Height);
				979	DECLARE_PDF_NAME(ImageMask);
				980	DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
				981	DECLARE_PDF_NAME(Interpolate);
				982	DECLARE_PDF_NAME(Width);
				983
				984	// values
				985	DECLARE_PDF_NAME(DeviceGray);
				986	DECLARE_PDF_NAME(DeviceRGB);
				987	DECLARE_PDF_NAME(DeviceCMYK);
				988	DECLARE_PDF_NAME(Indexed);
				989	DECLARE_PDF_NAME(ASCIIHexDecode);
				990	DECLARE_PDF_NAME(ASCII85Decode);
				991	DECLARE_PDF_NAME(LZWDecode);
				992	DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
				993	DECLARE_PDF_NAME(RunLengthDecode);
				994	DECLARE_PDF_NAME(CCITTFaxDecode);
				995	DECLARE_PDF_NAME(DCTDecode);
				996
				997	#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
				998
				999
				1000	static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
				1001	if (!key \|\| !key->isName()) {
				1002	return key;
				1003	}
				1004
				1005	// TODO(edisonn): use autogenerated code!
				1006	HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
				1007	HANDLE_NAME_ABBR(key, ColorSpace, CS);
				1008	HANDLE_NAME_ABBR(key, Decode, D);
				1009	HANDLE_NAME_ABBR(key, DecodeParms, DP);
				1010	HANDLE_NAME_ABBR(key, Filter, F);
				1011	HANDLE_NAME_ABBR(key, Height, H);
				1012	HANDLE_NAME_ABBR(key, ImageMask, IM);
				1013	// HANDLE_NAME_ABBR(key, Intent, );
				1014	HANDLE_NAME_ABBR(key, Interpolate, I);
				1015	HANDLE_NAME_ABBR(key, Width, W);
				1016
				1017	return key;
				1018	}
				1019
				1020	static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
				1021	if (!value \|\| !value->isName()) {
				1022	return value;
				1023	}
				1024
				1025	// TODO(edisonn): use autogenerated code!
				1026	HANDLE_NAME_ABBR(value, DeviceGray, G);
				1027	HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
				1028	HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
				1029	HANDLE_NAME_ABBR(value, Indexed, I);
				1030	HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
				1031	HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
				1032	HANDLE_NAME_ABBR(value, LZWDecode, LZW);
				1033	HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
				1034	HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
				1035	HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
				1036	HANDLE_NAME_ABBR(value, DCTDecode, DCT);
				1037
				1038	return value;
				1039	}
				1040
				1041	SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
				1042	// BI already processed
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1043	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1044	if (fUncompressedStream >= fUncompressedStreamEnd) {
				1045	return NULL;
				1046	}
				1047
				1048	SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
				1049	SkPdfObject::makeEmptyDictionary(inlineImage);
				1050
				1051	while (fUncompressedStream < fUncompressedStreamEnd) {
				1052	SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1053	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1054
				1055	if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1056	fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1057	return inlineImage;
				1058	} else {
				1059	SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1060	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1061	// TODO(edisonn): perf maybe we should not expand abreviation like this
				1062	inlineImage->set(inlineImageKeyAbbreviationExpand(key),
				1063	inlineImageValueAbbreviationExpand(obj));
				1064	}
				1065	}
				1066	// TODO(edisonn): report end of data with inline image without an EI
				1067	return inlineImage;
				1068	}