Blame - experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - platform/external/skia

blob: 7d8bcb695a645a34e1db60458920631cc969dea7 [file] [log] [blame]

edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	1
				2	#include "SkPdfNativeTokenizer.h"
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	3	#include "SkPdfObject.h"
				4	#include "SkPdfConfig.h"
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	5
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	6	#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	7	#include "SkPdfImageDictionary_autogen.h"
				8
				9	// TODO(edisonn): perf!!!
				10	// there could be 0s between start and end! but not in the needle.
				11	static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
				12	int needleLen = strlen(needle);
				13	if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				14	strncmp(hayStart, needle, needleLen) == 0) {
				15	return hayStart;
				16	}
				17
				18	hayStart++;
				19
				20	while (hayStart < hayEnd) {
				21	if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
				22	(isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				23	strncmp(hayStart, needle, needleLen) == 0) {
				24	return hayStart;
				25	}
				26	hayStart++;
				27	}
				28	return NULL;
				29	}
				30
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	31	#ifdef PDF_TRACE
				32	static void TRACE_INDENT(int level, const char* type) {
				33	static int id = 0;
				34	id++;
				35	if (478613 == id) {
				36	printf("break;\n");
				37	}
				38	// all types should have 2 letters, so the text is alligned nicely
				39	printf("\n%10i %15s: ", id, type);
				40	for (int i = 0 ; i < level; i++) {
				41	printf(" ");
				42	}
				43	}
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	44
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	45	static void TRACE_COMMENT(char ch) {
				46	printf("%c", ch);
				47	}
				48
				49	static void TRACE_TK(char ch) {
				50	printf("%c", ch);
				51	}
				52
				53	static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
				54	while (start < end) {
				55	printf("%c", *start);
				56	start++;
				57	}
				58	printf("\n");
				59	}
				60
				61	static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
				62	while (start < end) {
				63	printf("%c", *start);
				64	start++;
				65	}
				66	printf("\n");
				67	}
				68
				69	static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
				70	while (start < end) {
				71	printf("%c", *start);
				72	start++;
				73	}
				74	printf("\n");
				75	}
				76
				77	#else
				78	#define TRACE_INDENT(level,type)
				79	#define TRACE_COMMENT(ch)
				80	#define TRACE_TK(ch)
				81	#define TRACE_NAME(start,end)
				82	#define TRACE_STRING(start,end)
				83	#define TRACE_HEXSTRING(start,end)
				84	#endif
				85
				86	static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
				87	TRACE_INDENT(level, "White Space");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	88	while (start < end && isPdfWhiteSpace(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	89	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	90	if (*start == kComment_PdfDelimiter) {
				91	// skip the comment until end of line
				92	while (start < end && !isPdfEOL(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	93	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	94	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	95	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	96	}
				97	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	98	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	99	start++;
				100	}
				101	}
				102	return start;
				103	}
				104
				105	// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	106	static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	107	//int opened brackets
				108	//TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	109	TRACE_INDENT(level, "Token");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	110
				111	SkASSERT(!isPdfWhiteSpace(*start));
				112
				113	if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	114	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	115	start++;
				116	return start;
				117	}
				118
				119	while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	120	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	121	start++;
				122	}
				123	return start;
				124	}
				125
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	126	// last elem has to be ]
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	127	static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame^]	128	if (allocator == NULL) {
				129	// TODO(edisonn): report/warning error
				130	return end;
				131	}
				132
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	133	TRACE_INDENT(level, "Array");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	134	while (start < end) {
				135	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	136	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	137
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	138	const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	139
				140	if (endOfToken == start) {
				141	// TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
				142	return start;
				143	}
				144
				145	if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
				146	return endOfToken;
				147	}
				148
				149	SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	150	start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	151	// TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
				152	// we are sure they are not references!
				153	if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
				154	SkPdfObject* gen = array->removeLastInArray();
				155	SkPdfObject* id = array->removeLastInArray();
				156	newObj->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	157	SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	158	}
				159	array->appendInArray(newObj);
				160	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	161	printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	162	// TODO(edisonn): report not reached, we should never get here
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	163	// TODO(edisonn): there might be a bug here, enable an assert and run it on files
				164	// or it might be that the files were actually corrupted
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	165	return start;
				166	}
				167
				168	// When we read strings we will rewrite the string so we will reuse the memory
				169	// when we start to read the string, we already consumed the opened bracket
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	170
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	171	// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
				172
				173	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				174	TRACE_INDENT(level, "String");
				175	const unsigned char* in = start;
				176	bool hasOut = (out != NULL);
				177
				178	int openRoundBrackets = 1;
				179	while (in < end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	180	openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
				181	openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	182	if (openRoundBrackets == 0) {
				183	in++; // consumed )
				184	break;
				185	}
				186
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	187	if (*in == kEscape_PdfSpecial) {
				188	if (in + 1 < end) {
				189	switch (in[1]) {
				190	case 'n':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	191	if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	192	out++;
				193	in += 2;
				194	break;
				195
				196	case 'r':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	197	if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	198	out++;
				199	in += 2;
				200	break;
				201
				202	case 't':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	203	if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	204	out++;
				205	in += 2;
				206	break;
				207
				208	case 'b':
				209	// TODO(edisonn): any special meaning to backspace?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	210	if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	211	out++;
				212	in += 2;
				213	break;
				214
				215	case 'f':
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame^]	216	if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	217	out++;
				218	in += 2;
				219	break;
				220
				221	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	222	if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	223	out++;
				224	in += 2;
				225	break;
				226
				227	case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	228	if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	229	out++;
				230	in += 2;
				231	break;
				232
				233	case kEscape_PdfSpecial:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	234	if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	235	out++;
				236	in += 2;
				237	break;
				238
				239	case '0':
				240	case '1':
				241	case '2':
				242	case '3':
				243	case '4':
				244	case '5':
				245	case '6':
				246	case '7': {
				247	//read octals
				248	in++; // consume backslash
				249
				250	int code = 0;
				251	int i = 0;
				252	while (in < end && in >= '0' && in < '8') {
				253	code = (code << 3) + ((in) - '0'); // code 8 + d
				254	i++;
				255	in++;
				256	if (i == 3) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	257	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	258	out++;
				259	i = 0;
				260	}
				261	}
				262	if (i > 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	263	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	264	out++;
				265	}
				266	}
				267	break;
				268
				269	default:
				270	// Per spec, backslash is ignored is escaped ch is unknown
				271	in++;
				272	break;
				273	}
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	274	} else {
				275	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	276	}
				277	} else {
				278	// TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
				279	// we could have one look that first just inc current, and when we find the backslash
				280	// we go to this loop
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	281	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	282	in++;
				283	out++;
				284	}
				285	}
				286
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	287	if (hasOut) {
				288	return in; // consumed already ) at the end of the string
				289	} else {
				290	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				291	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	292	}
				293
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	294	static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
				295	return readString(level, start, end, NULL) - start;
				296	}
				297
				298	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	299	if (!allocator) {
				300	return end;
				301	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	302	int outLength = readStringLength(level, start, end);
				303	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				304	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				305	start = readString(level, start, end, out);
				306	SkPdfObject::makeString(out, out + outLength, str);
				307	TRACE_STRING(out, out + outLength);
				308	return start; // consumed already ) at the end of the string
				309	}
				310
				311	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				312	TRACE_INDENT(level, "HexString");
				313	bool hasOut = (out != NULL);
				314	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	315
				316	unsigned char code = 0;
				317
				318	while (in < end) {
				319	while (in < end && isPdfWhiteSpace(*in)) {
				320	in++;
				321	}
				322
				323	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	324	//*in = '\0';
				325	in++; // consume >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	326	// normal exit
				327	break;
				328	}
				329
				330	if (in >= end) {
				331	// end too soon
				332	break;
				333	}
				334
				335	switch (*in) {
				336	case '0':
				337	case '1':
				338	case '2':
				339	case '3':
				340	case '4':
				341	case '5':
				342	case '6':
				343	case '7':
				344	case '8':
				345	case '9':
				346	code = (*in - '0') << 4;
				347	break;
				348
				349	case 'a':
				350	case 'b':
				351	case 'c':
				352	case 'd':
				353	case 'e':
				354	case 'f':
				355	code = (*in - 'a' + 10) << 4;
				356	break;
				357
				358	case 'A':
				359	case 'B':
				360	case 'C':
				361	case 'D':
				362	case 'E':
				363	case 'F':
				364	code = (*in - 'A' + 10) << 4;
				365	break;
				366
				367	// TODO(edisonn): spec does not say how to handle this error
				368	default:
				369	break;
				370	}
				371
				372	in++; // advance
				373
				374	while (in < end && isPdfWhiteSpace(*in)) {
				375	in++;
				376	}
				377
				378	// TODO(edisonn): report error
				379	if (in >= end) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	380	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	381	out++;
				382	break;
				383	}
				384
				385	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	386	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	387	out++;
				388	break;
				389	}
				390
				391	switch (*in) {
				392	case '0':
				393	case '1':
				394	case '2':
				395	case '3':
				396	case '4':
				397	case '5':
				398	case '6':
				399	case '7':
				400	case '8':
				401	case '9':
				402	code += (*in - '0');
				403	break;
				404
				405	case 'a':
				406	case 'b':
				407	case 'c':
				408	case 'd':
				409	case 'e':
				410	case 'f':
				411	code += (*in - 'a' + 10);
				412	break;
				413
				414	case 'A':
				415	case 'B':
				416	case 'C':
				417	case 'D':
				418	case 'E':
				419	case 'F':
				420	code += (*in - 'A' + 10);
				421	break;
				422
				423	// TODO(edisonn): spec does not say how to handle this error
				424	default:
				425	break;
				426	}
				427
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	428	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	429	out++;
				430	in++;
				431	}
				432
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	433	if (hasOut) {
				434	return in; // consumed already > at the end of the string
				435	} else {
				436	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	437	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	438	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	439
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	440	static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
				441	return readHexString(level, start, end, NULL) - start;
				442	}
				443
				444	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	445	if (!allocator) {
				446	return end;
				447	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	448	int outLength = readHexStringLength(level, start, end);
				449	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				450	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				451	start = readHexString(level, start, end, out);
				452	SkPdfObject::makeHexString(out, out + outLength, str);
				453	TRACE_HEXSTRING(out, out + outLength);
				454	return start; // consumed already > at the end of the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	455	}
				456
				457	// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	458	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				459	TRACE_INDENT(level, "Name");
				460	bool hasOut = (out != NULL);
				461	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	462
				463	unsigned char code = 0;
				464
				465	while (in < end) {
				466	if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
				467	break;
				468	}
				469
				470	if (*in == '#' && in + 2 < end) {
				471	in++;
				472	switch (*in) {
				473	case '0':
				474	case '1':
				475	case '2':
				476	case '3':
				477	case '4':
				478	case '5':
				479	case '6':
				480	case '7':
				481	case '8':
				482	case '9':
				483	code = (*in - '0') << 4;
				484	break;
				485
				486	case 'a':
				487	case 'b':
				488	case 'c':
				489	case 'd':
				490	case 'e':
				491	case 'f':
				492	code = (*in - 'a' + 10) << 4;
				493	break;
				494
				495	case 'A':
				496	case 'B':
				497	case 'C':
				498	case 'D':
				499	case 'E':
				500	case 'F':
				501	code = (*in - 'A' + 10) << 4;
				502	break;
				503
				504	// TODO(edisonn): spec does not say how to handle this error
				505	default:
				506	break;
				507	}
				508
				509	in++; // advance
				510
				511	switch (*in) {
				512	case '0':
				513	case '1':
				514	case '2':
				515	case '3':
				516	case '4':
				517	case '5':
				518	case '6':
				519	case '7':
				520	case '8':
				521	case '9':
				522	code += (*in - '0');
				523	break;
				524
				525	case 'a':
				526	case 'b':
				527	case 'c':
				528	case 'd':
				529	case 'e':
				530	case 'f':
				531	code += (*in - 'a' + 10);
				532	break;
				533
				534	case 'A':
				535	case 'B':
				536	case 'C':
				537	case 'D':
				538	case 'E':
				539	case 'F':
				540	code += (*in - 'A' + 10);
				541	break;
				542
				543	// TODO(edisonn): spec does not say how to handle this error
				544	default:
				545	break;
				546	}
				547
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	548	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	549	out++;
				550	in++;
				551	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	552	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	553	out++;
				554	in++;
				555	}
				556	}
				557
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	558	if (hasOut) {
				559	return in;
				560	} else {
				561	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				562	}
				563	}
				564
				565	static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
				566	return readName(level, start, end, NULL) - start;
				567	}
				568
				569	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	570	if (!allocator) {
				571	return end;
				572	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	573	int outLength = readNameLength(level, start, end);
				574	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				575	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				576	start = readName(level, start, end, out);
				577	SkPdfObject::makeName(out, out + outLength, name);
				578	TRACE_NAME(out, out + outLength);
				579	return start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	580	}
				581
				582	// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
				583	// that makes for an interesting scenario, where the stream itself contains endstream, together
				584	// with a reference object with the length, but the real length object would be somewhere else
				585	// it could confuse the parser
				586	/*example:
				587
				588	7 0 obj
				589	<< /length 8 0 R>>
				590	stream
				591	...............
				592	endstream
				593	8 0 obj #we are in stream actually, not a real object
				594	<< 10 >> #we are in stream actually, not a real object
				595	endobj
				596	endstream
				597	8 0 obj #real obj
				598	<< 100 >> #real obj
				599	endobj
				600	and it could get worse, with multiple object like this
				601	*/
				602
				603	// right now implement the silly algorithm that assumes endstream is finishing the stream
				604
				605
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	606	static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
				607	TRACE_INDENT(level, "Stream");
				608	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	609	if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
				610	// no stream. return.
				611	return start;
				612	}
				613
				614	start += 6; // strlen("stream")
				615	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				616	start += 2;
				617	} else if (start[0] == kLF_PdfWhiteSpace) {
				618	start += 1;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	619	} else if (isPdfWhiteSpace(start[0])) {
				620	start += 1;
				621	} else {
				622	// TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
				623	// TODO(edisonn): warning?
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	624	}
				625
				626	SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
				627	// TODO(edisonn): load Length
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	628	int64_t length = -1;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	629
				630	// TODO(edisonn): very basic implementation
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	631	if (stream->has_Length() && stream->Length(doc) > 0) {
				632	length = stream->Length(doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	633	}
				634
				635	// TODO(edisonn): laod external streams
				636	// TODO(edisonn): look at the last filter, to determione how to deal with possible issue
				637
				638	if (length < 0) {
				639	// scan the buffer, until we find first endstream
				640	// TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	641	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "endstream");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	642
				643	if (endstream) {
				644	length = endstream - start;
				645	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	646	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	647	}
				648	}
				649	if (length >= 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	650	const unsigned char* endstream = start + length;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	651
				652	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				653	endstream += 2;
				654	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				655	endstream += 1;
				656	}
				657
				658	// TODO(edisonn): verify the next bytes are "endstream"
				659
				660	endstream += strlen("endstream");
				661	// TODO(edisonn): Assert? report error/warning?
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	662	dict->addStream(start, (size_t)length);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	663	return endstream;
				664	}
				665	return start;
				666	}
				667
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	668	static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
				669	TRACE_INDENT(level, "Inline Image");
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	670	// We already processed ID keyword, and we should be positioned immediately after it
				671
				672	// TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
				673	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				674	start += 2;
				675	} else if (start[0] == kLF_PdfWhiteSpace) {
				676	start += 1;
				677	} else if (isPdfWhiteSpace(start[0])) {
				678	start += 1;
				679	} else {
				680	SkASSERT(isPdfDelimiter(start[0]));
				681	// TODO(edisonn): warning?
				682	}
				683
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	684	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "EI");
				685	const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	686
				687	if (endstream) {
				688	int length = endstream - start;
				689	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
				690	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
				691	inlineImage->addStream(start, (size_t)length);
				692	} else {
				693	// TODO(edisonn): report error in inline image stream (ID-EI) section
				694	// TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
				695	return end;
				696	}
				697	return endEI;
				698	}
				699
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	700	static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame^]	701	if (allocator == NULL) {
				702	// TODO(edisonn): report/warning error
				703	return end;
				704	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	705	TRACE_INDENT(level, "Dictionary");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	706	SkPdfObject::makeEmptyDictionary(dict);
				707
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	708	start = skipPdfWhiteSpaces(level, start, end);
				709	SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	710
				711	while (start < end && *start == kNamed_PdfDelimiter) {
				712	SkPdfObject key;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	713	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	714	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	715	start = readName(level + 1, start, end, &key, &tmpStorage);
				716	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	717
				718	if (start < end) {
				719	SkPdfObject* value = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	720	start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	721
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	722	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	723
				724	if (start < end) {
				725	// seems we have an indirect reference
				726	if (isPdfDigit(*start)) {
				727	SkPdfObject generation;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	728	start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	729
				730	SkPdfObject keywordR;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	731	start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	732
				733	if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
				734	int64_t id = value->intValue();
				735	value->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	736	SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	737	dict->set(&key, value);
				738	} else {
				739	// error, ignore
				740	dict->set(&key, value);
				741	}
				742	} else {
				743	// next elem is not a digit, but it might not be / either!
				744	dict->set(&key, value);
				745	}
				746	} else {
				747	// /key >>
				748	dict->set(&key, value);
				749	return end;
				750	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	751	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	752	} else {
				753	dict->set(&key, &SkPdfObject::kNull);
				754	return end;
				755	}
				756	}
				757
				758	// TODO(edisonn): options to ignore these errors
				759
				760	// now we should expect >>
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	761	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	762	if (*start != kClosedInequityBracket_PdfDelimiter) {
				763	// TODO(edisonn): report/warning
				764	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	765	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	766	start++; // skip >
				767	if (*start != kClosedInequityBracket_PdfDelimiter) {
				768	// TODO(edisonn): report/warning
				769	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	770	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	771	start++; // skip >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	772
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	773	start = readStream(level, start, end, dict, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	774
				775	return start;
				776	}
				777
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	778	const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
				779	const unsigned char* current;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	780
				781	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	782	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	783
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	784	current = endOfPdfToken(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	785
				786	// no token, len would be 0
				787	if (current == start) {
				788	return NULL;
				789	}
				790
				791	int tokenLen = current - start;
				792
				793	if (tokenLen == 1) {
				794	// start array
				795	switch (*start) {
				796	case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	797	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	798	SkPdfObject::makeEmptyArray(token);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	799	return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	800
				801	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	802	//*start = '\0';
				803	return readString(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	804
				805	case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	806	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	807	if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	808	//start[1] = '\0'; // optional
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	809	// TODO(edisonn): pass here the length somehow?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	810	return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	811	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	812	return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	813	}
				814
				815	case kNamed_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	816	//*start = '\0';
				817	return readName(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	818
				819	// TODO(edisonn): what to do curly brackets? read spec!
				820	case kOpenedCurlyBracket_PdfDelimiter:
				821	default:
				822	break;
				823	}
				824
				825	SkASSERT(!isPdfWhiteSpace(*start));
				826	if (isPdfDelimiter(*start)) {
				827	// TODO(edisonn): how stream ] } > ) will be handled?
				828	// for now ignore, and it will become a keyword to be ignored
				829	}
				830	}
				831
				832	if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
				833	SkPdfObject::makeNull(token);
				834	return current;
				835	}
				836
				837	if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
				838	SkPdfObject::makeBoolean(true, token);
				839	return current;
				840	}
				841
				842	if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
				843	SkPdfObject::makeBoolean(false, token);
				844	return current;
				845	}
				846
				847	if (isPdfNumeric(*start)) {
				848	SkPdfObject::makeNumeric(start, current, token);
				849	} else {
				850	SkPdfObject::makeKeyword(start, current, token);
				851	}
				852	return current;
				853	}
				854
				855	SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	856	fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	857	return new SkPdfObject[BUFFER_SIZE];
				858	}
				859
				860	SkPdfAllocator::~SkPdfAllocator() {
				861	for (int i = 0 ; i < fHandles.count(); i++) {
				862	free(fHandles[i]);
				863	}
				864	for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	865	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				866	fHistory[i][j].reset();
				867	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	868	delete[] fHistory[i];
				869	}
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	870	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				871	fCurrent[j].reset();
				872	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	873	delete[] fCurrent;
				874	}
				875
				876	SkPdfObject* SkPdfAllocator::allocObject() {
				877	if (fCurrentUsed >= BUFFER_SIZE) {
				878	fHistory.push(fCurrent);
				879	fCurrent = allocBlock();
				880	fCurrentUsed = 0;
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	881	fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	882	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	883	fCurrentUsed++;
				884	return &fCurrent[fCurrentUsed - 1];
				885	}
				886
				887	// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	888	SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	889	const unsigned char* buffer = NULL;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	890	size_t len = 0;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	891	objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	892	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	893	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	894	if (endobj) {
				895	len = endobj - (char*)buffer + strlen("endobj");
				896	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	897	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	898	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	899	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	900
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	901	SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	902	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	903	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	904	if (endobj) {
				905	len = endobj - (char*)buffer + strlen("endobj");
				906	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	907	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	908	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	909	}
				910
				911	SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	912	}
				913
				914	bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
				915	token->fKeyword = NULL;
				916	token->fObject = NULL;
				917
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	918	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	919	if (fUncompressedStream >= fUncompressedStreamEnd) {
				920	return false;
				921	}
				922
				923	SkPdfObject obj;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	924	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	925
				926	// If it is a keyword, we will only get the pointer of the string
				927	if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
				928	token->fKeyword = obj.c_str();
				929	token->fKeywordLength = obj.len();
				930	token->fType = kKeyword_TokenType;
				931	} else {
				932	SkPdfObject* pobj = fAllocator->allocObject();
				933	*pobj = obj;
				934	token->fObject = pobj;
				935	token->fType = kObject_TokenType;
				936	}
				937
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	938	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	939	static int read_op = 0;
				940	read_op++;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	941	if (548 == read_op) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	942	printf("break;\n");
				943	}
				944	printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				945	#endif
				946
				947	return true;
				948	}
				949
				950	void SkPdfNativeTokenizer::PutBack(PdfToken token) {
				951	SkASSERT(!fHasPutBack);
				952	fHasPutBack = true;
				953	fPutBack = token;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	954	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	955	printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
				956	#endif
				957	}
				958
				959	bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
				960	if (fHasPutBack) {
				961	*token = fPutBack;
				962	fHasPutBack = false;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	963	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	964	printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				965	#endif
				966	return true;
				967	}
				968
				969	if (fEmpty) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	970	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	971	printf("EMPTY TOKENIZER\n");
				972	#endif
				973	return false;
				974	}
				975
				976	return readTokenCore(token);
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	977	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	978
				979	#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
				980
				981	// keys
				982	DECLARE_PDF_NAME(BitsPerComponent);
				983	DECLARE_PDF_NAME(ColorSpace);
				984	DECLARE_PDF_NAME(Decode);
				985	DECLARE_PDF_NAME(DecodeParms);
				986	DECLARE_PDF_NAME(Filter);
				987	DECLARE_PDF_NAME(Height);
				988	DECLARE_PDF_NAME(ImageMask);
				989	DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
				990	DECLARE_PDF_NAME(Interpolate);
				991	DECLARE_PDF_NAME(Width);
				992
				993	// values
				994	DECLARE_PDF_NAME(DeviceGray);
				995	DECLARE_PDF_NAME(DeviceRGB);
				996	DECLARE_PDF_NAME(DeviceCMYK);
				997	DECLARE_PDF_NAME(Indexed);
				998	DECLARE_PDF_NAME(ASCIIHexDecode);
				999	DECLARE_PDF_NAME(ASCII85Decode);
				1000	DECLARE_PDF_NAME(LZWDecode);
				1001	DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
				1002	DECLARE_PDF_NAME(RunLengthDecode);
				1003	DECLARE_PDF_NAME(CCITTFaxDecode);
				1004	DECLARE_PDF_NAME(DCTDecode);
				1005
				1006	#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
				1007
				1008
				1009	static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
				1010	if (!key \|\| !key->isName()) {
				1011	return key;
				1012	}
				1013
				1014	// TODO(edisonn): use autogenerated code!
				1015	HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
				1016	HANDLE_NAME_ABBR(key, ColorSpace, CS);
				1017	HANDLE_NAME_ABBR(key, Decode, D);
				1018	HANDLE_NAME_ABBR(key, DecodeParms, DP);
				1019	HANDLE_NAME_ABBR(key, Filter, F);
				1020	HANDLE_NAME_ABBR(key, Height, H);
				1021	HANDLE_NAME_ABBR(key, ImageMask, IM);
				1022	// HANDLE_NAME_ABBR(key, Intent, );
				1023	HANDLE_NAME_ABBR(key, Interpolate, I);
				1024	HANDLE_NAME_ABBR(key, Width, W);
				1025
				1026	return key;
				1027	}
				1028
				1029	static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
				1030	if (!value \|\| !value->isName()) {
				1031	return value;
				1032	}
				1033
				1034	// TODO(edisonn): use autogenerated code!
				1035	HANDLE_NAME_ABBR(value, DeviceGray, G);
				1036	HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
				1037	HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
				1038	HANDLE_NAME_ABBR(value, Indexed, I);
				1039	HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
				1040	HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
				1041	HANDLE_NAME_ABBR(value, LZWDecode, LZW);
				1042	HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
				1043	HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
				1044	HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
				1045	HANDLE_NAME_ABBR(value, DCTDecode, DCT);
				1046
				1047	return value;
				1048	}
				1049
				1050	SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
				1051	// BI already processed
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1052	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1053	if (fUncompressedStream >= fUncompressedStreamEnd) {
				1054	return NULL;
				1055	}
				1056
				1057	SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
				1058	SkPdfObject::makeEmptyDictionary(inlineImage);
				1059
				1060	while (fUncompressedStream < fUncompressedStreamEnd) {
				1061	SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1062	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1063
				1064	if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1065	fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1066	return inlineImage;
				1067	} else {
				1068	SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1069	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1070	// TODO(edisonn): perf maybe we should not expand abreviation like this
				1071	inlineImage->set(inlineImageKeyAbbreviationExpand(key),
				1072	inlineImageValueAbbreviationExpand(obj));
				1073	}
				1074	}
				1075	// TODO(edisonn): report end of data with inline image without an EI
				1076	return inlineImage;
				1077	}