Blame - experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - platform/external/skia

blob: 09b7a0b3a88fdaca17f01fafb06a3e46a1c8d72d [file] [log] [blame]

edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	1
				2	#include "SkPdfNativeTokenizer.h"
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	3	#include "SkPdfObject.h"
				4	#include "SkPdfConfig.h"
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	5
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	6	#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	7	#include "SkPdfImageDictionary_autogen.h"
				8
				9	// TODO(edisonn): perf!!!
				10	// there could be 0s between start and end! but not in the needle.
				11	static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
				12	int needleLen = strlen(needle);
				13	if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				14	strncmp(hayStart, needle, needleLen) == 0) {
				15	return hayStart;
				16	}
				17
				18	hayStart++;
				19
				20	while (hayStart < hayEnd) {
				21	if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
				22	(isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				23	strncmp(hayStart, needle, needleLen) == 0) {
				24	return hayStart;
				25	}
				26	hayStart++;
				27	}
				28	return NULL;
				29	}
				30
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	31	#ifdef PDF_TRACE
				32	static void TRACE_INDENT(int level, const char* type) {
				33	static int id = 0;
				34	id++;
				35	if (478613 == id) {
				36	printf("break;\n");
				37	}
				38	// all types should have 2 letters, so the text is alligned nicely
				39	printf("\n%10i %15s: ", id, type);
				40	for (int i = 0 ; i < level; i++) {
				41	printf(" ");
				42	}
				43	}
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	44
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	45	static void TRACE_COMMENT(char ch) {
				46	printf("%c", ch);
				47	}
				48
				49	static void TRACE_TK(char ch) {
				50	printf("%c", ch);
				51	}
				52
				53	static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
				54	while (start < end) {
				55	printf("%c", *start);
				56	start++;
				57	}
				58	printf("\n");
				59	}
				60
				61	static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
				62	while (start < end) {
				63	printf("%c", *start);
				64	start++;
				65	}
				66	printf("\n");
				67	}
				68
				69	static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
				70	while (start < end) {
				71	printf("%c", *start);
				72	start++;
				73	}
				74	printf("\n");
				75	}
				76
				77	#else
				78	#define TRACE_INDENT(level,type)
				79	#define TRACE_COMMENT(ch)
				80	#define TRACE_TK(ch)
				81	#define TRACE_NAME(start,end)
				82	#define TRACE_STRING(start,end)
				83	#define TRACE_HEXSTRING(start,end)
				84	#endif
				85
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame^]	86	const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	87	TRACE_INDENT(level, "White Space");
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame^]	88	while (start < end && (isPdfWhiteSpace(start) \|\| start == kComment_PdfDelimiter)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	89	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	90	if (*start == kComment_PdfDelimiter) {
				91	// skip the comment until end of line
				92	while (start < end && !isPdfEOL(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	93	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	94	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	95	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	96	}
				97	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	98	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	99	start++;
				100	}
				101	}
				102	return start;
				103	}
				104
				105	// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame^]	106	const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	107	//int opened brackets
				108	//TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	109	TRACE_INDENT(level, "Token");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	110
				111	SkASSERT(!isPdfWhiteSpace(*start));
				112
				113	if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	114	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	115	start++;
				116	return start;
				117	}
				118
				119	while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	120	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	121	start++;
				122	}
				123	return start;
				124	}
				125
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	126	// last elem has to be ]
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	127	static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame]	128	if (allocator == NULL) {
				129	// TODO(edisonn): report/warning error
				130	return end;
				131	}
				132
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	133	TRACE_INDENT(level, "Array");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	134	while (start < end) {
				135	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	136	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	137
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	138	const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	139
				140	if (endOfToken == start) {
				141	// TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
				142	return start;
				143	}
				144
				145	if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
				146	return endOfToken;
				147	}
				148
				149	SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	150	start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	151	// TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
				152	// we are sure they are not references!
				153	if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
				154	SkPdfObject* gen = array->removeLastInArray();
				155	SkPdfObject* id = array->removeLastInArray();
				156	newObj->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	157	SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	158	}
				159	array->appendInArray(newObj);
				160	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	161	printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	162	// TODO(edisonn): report not reached, we should never get here
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	163	// TODO(edisonn): there might be a bug here, enable an assert and run it on files
				164	// or it might be that the files were actually corrupted
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	165	return start;
				166	}
				167
				168	// When we read strings we will rewrite the string so we will reuse the memory
				169	// when we start to read the string, we already consumed the opened bracket
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	170
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	171	// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
				172
				173	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				174	TRACE_INDENT(level, "String");
				175	const unsigned char* in = start;
				176	bool hasOut = (out != NULL);
				177
				178	int openRoundBrackets = 1;
				179	while (in < end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	180	openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
				181	openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	182	if (openRoundBrackets == 0) {
				183	in++; // consumed )
				184	break;
				185	}
				186
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	187	if (*in == kEscape_PdfSpecial) {
				188	if (in + 1 < end) {
				189	switch (in[1]) {
				190	case 'n':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	191	if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	192	out++;
				193	in += 2;
				194	break;
				195
				196	case 'r':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	197	if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	198	out++;
				199	in += 2;
				200	break;
				201
				202	case 't':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	203	if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	204	out++;
				205	in += 2;
				206	break;
				207
				208	case 'b':
				209	// TODO(edisonn): any special meaning to backspace?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	210	if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	211	out++;
				212	in += 2;
				213	break;
				214
				215	case 'f':
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame]	216	if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	217	out++;
				218	in += 2;
				219	break;
				220
				221	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	222	if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	223	out++;
				224	in += 2;
				225	break;
				226
				227	case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	228	if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	229	out++;
				230	in += 2;
				231	break;
				232
				233	case kEscape_PdfSpecial:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	234	if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	235	out++;
				236	in += 2;
				237	break;
				238
				239	case '0':
				240	case '1':
				241	case '2':
				242	case '3':
				243	case '4':
				244	case '5':
				245	case '6':
				246	case '7': {
				247	//read octals
				248	in++; // consume backslash
				249
				250	int code = 0;
				251	int i = 0;
				252	while (in < end && in >= '0' && in < '8') {
				253	code = (code << 3) + ((in) - '0'); // code 8 + d
				254	i++;
				255	in++;
				256	if (i == 3) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	257	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	258	out++;
				259	i = 0;
				260	}
				261	}
				262	if (i > 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	263	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	264	out++;
				265	}
				266	}
				267	break;
				268
				269	default:
				270	// Per spec, backslash is ignored is escaped ch is unknown
				271	in++;
				272	break;
				273	}
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	274	} else {
				275	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	276	}
				277	} else {
				278	// TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
				279	// we could have one look that first just inc current, and when we find the backslash
				280	// we go to this loop
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	281	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	282	in++;
				283	out++;
				284	}
				285	}
				286
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	287	if (hasOut) {
				288	return in; // consumed already ) at the end of the string
				289	} else {
				290	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				291	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	292	}
				293
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	294	static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
				295	return readString(level, start, end, NULL) - start;
				296	}
				297
				298	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	299	if (!allocator) {
				300	return end;
				301	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	302	int outLength = readStringLength(level, start, end);
				303	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				304	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				305	start = readString(level, start, end, out);
				306	SkPdfObject::makeString(out, out + outLength, str);
				307	TRACE_STRING(out, out + outLength);
				308	return start; // consumed already ) at the end of the string
				309	}
				310
				311	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				312	TRACE_INDENT(level, "HexString");
				313	bool hasOut = (out != NULL);
				314	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	315
				316	unsigned char code = 0;
				317
				318	while (in < end) {
				319	while (in < end && isPdfWhiteSpace(*in)) {
				320	in++;
				321	}
				322
				323	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	324	//*in = '\0';
				325	in++; // consume >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	326	// normal exit
				327	break;
				328	}
				329
				330	if (in >= end) {
				331	// end too soon
				332	break;
				333	}
				334
				335	switch (*in) {
				336	case '0':
				337	case '1':
				338	case '2':
				339	case '3':
				340	case '4':
				341	case '5':
				342	case '6':
				343	case '7':
				344	case '8':
				345	case '9':
				346	code = (*in - '0') << 4;
				347	break;
				348
				349	case 'a':
				350	case 'b':
				351	case 'c':
				352	case 'd':
				353	case 'e':
				354	case 'f':
				355	code = (*in - 'a' + 10) << 4;
				356	break;
				357
				358	case 'A':
				359	case 'B':
				360	case 'C':
				361	case 'D':
				362	case 'E':
				363	case 'F':
				364	code = (*in - 'A' + 10) << 4;
				365	break;
				366
				367	// TODO(edisonn): spec does not say how to handle this error
				368	default:
				369	break;
				370	}
				371
				372	in++; // advance
				373
				374	while (in < end && isPdfWhiteSpace(*in)) {
				375	in++;
				376	}
				377
				378	// TODO(edisonn): report error
				379	if (in >= end) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	380	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	381	out++;
				382	break;
				383	}
				384
				385	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	386	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	387	out++;
edisonn@google.com	1acab36	2013-07-25 22:03:22 +0000	[diff] [blame]	388	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	389	break;
				390	}
				391
				392	switch (*in) {
				393	case '0':
				394	case '1':
				395	case '2':
				396	case '3':
				397	case '4':
				398	case '5':
				399	case '6':
				400	case '7':
				401	case '8':
				402	case '9':
				403	code += (*in - '0');
				404	break;
				405
				406	case 'a':
				407	case 'b':
				408	case 'c':
				409	case 'd':
				410	case 'e':
				411	case 'f':
				412	code += (*in - 'a' + 10);
				413	break;
				414
				415	case 'A':
				416	case 'B':
				417	case 'C':
				418	case 'D':
				419	case 'E':
				420	case 'F':
				421	code += (*in - 'A' + 10);
				422	break;
				423
				424	// TODO(edisonn): spec does not say how to handle this error
				425	default:
				426	break;
				427	}
				428
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	429	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	430	out++;
				431	in++;
				432	}
				433
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	434	if (hasOut) {
				435	return in; // consumed already > at the end of the string
				436	} else {
				437	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	438	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	439	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	440
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	441	static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
				442	return readHexString(level, start, end, NULL) - start;
				443	}
				444
				445	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	446	if (!allocator) {
				447	return end;
				448	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	449	int outLength = readHexStringLength(level, start, end);
				450	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				451	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				452	start = readHexString(level, start, end, out);
				453	SkPdfObject::makeHexString(out, out + outLength, str);
				454	TRACE_HEXSTRING(out, out + outLength);
				455	return start; // consumed already > at the end of the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	456	}
				457
				458	// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	459	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				460	TRACE_INDENT(level, "Name");
				461	bool hasOut = (out != NULL);
				462	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	463
				464	unsigned char code = 0;
				465
				466	while (in < end) {
				467	if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
				468	break;
				469	}
				470
				471	if (*in == '#' && in + 2 < end) {
				472	in++;
				473	switch (*in) {
				474	case '0':
				475	case '1':
				476	case '2':
				477	case '3':
				478	case '4':
				479	case '5':
				480	case '6':
				481	case '7':
				482	case '8':
				483	case '9':
				484	code = (*in - '0') << 4;
				485	break;
				486
				487	case 'a':
				488	case 'b':
				489	case 'c':
				490	case 'd':
				491	case 'e':
				492	case 'f':
				493	code = (*in - 'a' + 10) << 4;
				494	break;
				495
				496	case 'A':
				497	case 'B':
				498	case 'C':
				499	case 'D':
				500	case 'E':
				501	case 'F':
				502	code = (*in - 'A' + 10) << 4;
				503	break;
				504
				505	// TODO(edisonn): spec does not say how to handle this error
				506	default:
				507	break;
				508	}
				509
				510	in++; // advance
				511
				512	switch (*in) {
				513	case '0':
				514	case '1':
				515	case '2':
				516	case '3':
				517	case '4':
				518	case '5':
				519	case '6':
				520	case '7':
				521	case '8':
				522	case '9':
				523	code += (*in - '0');
				524	break;
				525
				526	case 'a':
				527	case 'b':
				528	case 'c':
				529	case 'd':
				530	case 'e':
				531	case 'f':
				532	code += (*in - 'a' + 10);
				533	break;
				534
				535	case 'A':
				536	case 'B':
				537	case 'C':
				538	case 'D':
				539	case 'E':
				540	case 'F':
				541	code += (*in - 'A' + 10);
				542	break;
				543
				544	// TODO(edisonn): spec does not say how to handle this error
				545	default:
				546	break;
				547	}
				548
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	549	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	550	out++;
				551	in++;
				552	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	553	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	554	out++;
				555	in++;
				556	}
				557	}
				558
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	559	if (hasOut) {
				560	return in;
				561	} else {
				562	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				563	}
				564	}
				565
				566	static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
				567	return readName(level, start, end, NULL) - start;
				568	}
				569
				570	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	571	if (!allocator) {
				572	return end;
				573	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	574	int outLength = readNameLength(level, start, end);
				575	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				576	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				577	start = readName(level, start, end, out);
				578	SkPdfObject::makeName(out, out + outLength, name);
				579	TRACE_NAME(out, out + outLength);
				580	return start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	581	}
				582
				583	// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
				584	// that makes for an interesting scenario, where the stream itself contains endstream, together
				585	// with a reference object with the length, but the real length object would be somewhere else
				586	// it could confuse the parser
				587	/*example:
				588
				589	7 0 obj
				590	<< /length 8 0 R>>
				591	stream
				592	...............
				593	endstream
				594	8 0 obj #we are in stream actually, not a real object
				595	<< 10 >> #we are in stream actually, not a real object
				596	endobj
				597	endstream
				598	8 0 obj #real obj
				599	<< 100 >> #real obj
				600	endobj
				601	and it could get worse, with multiple object like this
				602	*/
				603
				604	// right now implement the silly algorithm that assumes endstream is finishing the stream
				605
				606
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	607	static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
				608	TRACE_INDENT(level, "Stream");
				609	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	610	if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
				611	// no stream. return.
				612	return start;
				613	}
				614
				615	start += 6; // strlen("stream")
				616	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				617	start += 2;
				618	} else if (start[0] == kLF_PdfWhiteSpace) {
				619	start += 1;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	620	} else if (isPdfWhiteSpace(start[0])) {
				621	start += 1;
				622	} else {
				623	// TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
				624	// TODO(edisonn): warning?
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	625	}
				626
				627	SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
				628	// TODO(edisonn): load Length
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	629	int64_t length = -1;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	630
				631	// TODO(edisonn): very basic implementation
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	632	if (stream->has_Length() && stream->Length(doc) > 0) {
				633	length = stream->Length(doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	634	}
				635
				636	// TODO(edisonn): laod external streams
				637	// TODO(edisonn): look at the last filter, to determione how to deal with possible issue
				638
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame^]	639
				640	if (length >= 0) {
				641	const unsigned char* endstream = start + length;
				642
				643	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				644	endstream += 2;
				645	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				646	endstream += 1;
				647	}
				648
				649	if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
				650	length = -1;
				651	}
				652	}
				653
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	654	if (length < 0) {
				655	// scan the buffer, until we find first endstream
				656	// TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	657	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "endstream");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	658
				659	if (endstream) {
				660	length = endstream - start;
				661	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	662	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	663	}
				664	}
				665	if (length >= 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	666	const unsigned char* endstream = start + length;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	667
				668	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				669	endstream += 2;
				670	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				671	endstream += 1;
				672	}
				673
				674	// TODO(edisonn): verify the next bytes are "endstream"
				675
				676	endstream += strlen("endstream");
				677	// TODO(edisonn): Assert? report error/warning?
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	678	dict->addStream(start, (size_t)length);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	679	return endstream;
				680	}
				681	return start;
				682	}
				683
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	684	static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
				685	TRACE_INDENT(level, "Inline Image");
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	686	// We already processed ID keyword, and we should be positioned immediately after it
				687
				688	// TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
				689	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				690	start += 2;
				691	} else if (start[0] == kLF_PdfWhiteSpace) {
				692	start += 1;
				693	} else if (isPdfWhiteSpace(start[0])) {
				694	start += 1;
				695	} else {
				696	SkASSERT(isPdfDelimiter(start[0]));
				697	// TODO(edisonn): warning?
				698	}
				699
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	700	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "EI");
				701	const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	702
				703	if (endstream) {
				704	int length = endstream - start;
				705	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
				706	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
				707	inlineImage->addStream(start, (size_t)length);
				708	} else {
				709	// TODO(edisonn): report error in inline image stream (ID-EI) section
				710	// TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
				711	return end;
				712	}
				713	return endEI;
				714	}
				715
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	716	static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame]	717	if (allocator == NULL) {
				718	// TODO(edisonn): report/warning error
				719	return end;
				720	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	721	TRACE_INDENT(level, "Dictionary");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	722	SkPdfObject::makeEmptyDictionary(dict);
				723
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	724	start = skipPdfWhiteSpaces(level, start, end);
				725	SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	726
				727	while (start < end && *start == kNamed_PdfDelimiter) {
				728	SkPdfObject key;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	729	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	730	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	731	start = readName(level + 1, start, end, &key, &tmpStorage);
				732	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	733
				734	if (start < end) {
				735	SkPdfObject* value = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	736	start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	737
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	738	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	739
				740	if (start < end) {
				741	// seems we have an indirect reference
				742	if (isPdfDigit(*start)) {
				743	SkPdfObject generation;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	744	start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	745
				746	SkPdfObject keywordR;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	747	start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	748
				749	if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
				750	int64_t id = value->intValue();
				751	value->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	752	SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	753	dict->set(&key, value);
				754	} else {
				755	// error, ignore
				756	dict->set(&key, value);
				757	}
				758	} else {
				759	// next elem is not a digit, but it might not be / either!
				760	dict->set(&key, value);
				761	}
				762	} else {
				763	// /key >>
				764	dict->set(&key, value);
				765	return end;
				766	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	767	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	768	} else {
				769	dict->set(&key, &SkPdfObject::kNull);
				770	return end;
				771	}
				772	}
				773
				774	// TODO(edisonn): options to ignore these errors
				775
				776	// now we should expect >>
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	777	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	778	if (*start != kClosedInequityBracket_PdfDelimiter) {
				779	// TODO(edisonn): report/warning
				780	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	781	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	782	start++; // skip >
				783	if (*start != kClosedInequityBracket_PdfDelimiter) {
				784	// TODO(edisonn): report/warning
				785	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	786	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	787	start++; // skip >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	788
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	789	start = readStream(level, start, end, dict, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	790
				791	return start;
				792	}
				793
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	794	const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
				795	const unsigned char* current;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	796
				797	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	798	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	799
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	800	current = endOfPdfToken(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	801
				802	// no token, len would be 0
				803	if (current == start) {
				804	return NULL;
				805	}
				806
				807	int tokenLen = current - start;
				808
				809	if (tokenLen == 1) {
				810	// start array
				811	switch (*start) {
				812	case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	813	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	814	SkPdfObject::makeEmptyArray(token);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	815	return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	816
				817	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	818	//*start = '\0';
				819	return readString(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	820
				821	case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	822	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	823	if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	824	//start[1] = '\0'; // optional
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	825	// TODO(edisonn): pass here the length somehow?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	826	return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	827	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	828	return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	829	}
				830
				831	case kNamed_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	832	//*start = '\0';
				833	return readName(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	834
				835	// TODO(edisonn): what to do curly brackets? read spec!
				836	case kOpenedCurlyBracket_PdfDelimiter:
				837	default:
				838	break;
				839	}
				840
				841	SkASSERT(!isPdfWhiteSpace(*start));
				842	if (isPdfDelimiter(*start)) {
				843	// TODO(edisonn): how stream ] } > ) will be handled?
				844	// for now ignore, and it will become a keyword to be ignored
				845	}
				846	}
				847
				848	if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
				849	SkPdfObject::makeNull(token);
				850	return current;
				851	}
				852
				853	if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
				854	SkPdfObject::makeBoolean(true, token);
				855	return current;
				856	}
				857
				858	if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
				859	SkPdfObject::makeBoolean(false, token);
				860	return current;
				861	}
				862
				863	if (isPdfNumeric(*start)) {
				864	SkPdfObject::makeNumeric(start, current, token);
				865	} else {
				866	SkPdfObject::makeKeyword(start, current, token);
				867	}
				868	return current;
				869	}
				870
				871	SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	872	fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	873	return new SkPdfObject[BUFFER_SIZE];
				874	}
				875
				876	SkPdfAllocator::~SkPdfAllocator() {
				877	for (int i = 0 ; i < fHandles.count(); i++) {
				878	free(fHandles[i]);
				879	}
				880	for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	881	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				882	fHistory[i][j].reset();
				883	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	884	delete[] fHistory[i];
				885	}
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	886	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				887	fCurrent[j].reset();
				888	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	889	delete[] fCurrent;
				890	}
				891
				892	SkPdfObject* SkPdfAllocator::allocObject() {
				893	if (fCurrentUsed >= BUFFER_SIZE) {
				894	fHistory.push(fCurrent);
				895	fCurrent = allocBlock();
				896	fCurrentUsed = 0;
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	897	fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	898	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	899	fCurrentUsed++;
				900	return &fCurrent[fCurrentUsed - 1];
				901	}
				902
				903	// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	904	SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	905	const unsigned char* buffer = NULL;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	906	size_t len = 0;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	907	objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	908	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	909	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	910	if (endobj) {
				911	len = endobj - (char*)buffer + strlen("endobj");
				912	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	913	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	914	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	915	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	916
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	917	SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	918	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	919	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	920	if (endobj) {
				921	len = endobj - (char*)buffer + strlen("endobj");
				922	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	923	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	924	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	925	}
				926
				927	SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	928	}
				929
				930	bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
				931	token->fKeyword = NULL;
				932	token->fObject = NULL;
				933
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	934	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	935	if (fUncompressedStream >= fUncompressedStreamEnd) {
				936	return false;
				937	}
				938
				939	SkPdfObject obj;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	940	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	941
				942	// If it is a keyword, we will only get the pointer of the string
				943	if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
				944	token->fKeyword = obj.c_str();
edisonn@google.com	e878e72	2013-07-29 19:10:58 +0000	[diff] [blame]	945	token->fKeywordLength = obj.lenstr();
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	946	token->fType = kKeyword_TokenType;
				947	} else {
				948	SkPdfObject* pobj = fAllocator->allocObject();
				949	*pobj = obj;
				950	token->fObject = pobj;
				951	token->fType = kObject_TokenType;
				952	}
				953
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	954	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	955	static int read_op = 0;
				956	read_op++;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	957	if (548 == read_op) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	958	printf("break;\n");
				959	}
				960	printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				961	#endif
				962
				963	return true;
				964	}
				965
				966	void SkPdfNativeTokenizer::PutBack(PdfToken token) {
				967	SkASSERT(!fHasPutBack);
				968	fHasPutBack = true;
				969	fPutBack = token;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	970	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	971	printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
				972	#endif
				973	}
				974
				975	bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
				976	if (fHasPutBack) {
				977	*token = fPutBack;
				978	fHasPutBack = false;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	979	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	980	printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				981	#endif
				982	return true;
				983	}
				984
				985	if (fEmpty) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	986	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	987	printf("EMPTY TOKENIZER\n");
				988	#endif
				989	return false;
				990	}
				991
				992	return readTokenCore(token);
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	993	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	994
				995	#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
				996
				997	// keys
				998	DECLARE_PDF_NAME(BitsPerComponent);
				999	DECLARE_PDF_NAME(ColorSpace);
				1000	DECLARE_PDF_NAME(Decode);
				1001	DECLARE_PDF_NAME(DecodeParms);
				1002	DECLARE_PDF_NAME(Filter);
				1003	DECLARE_PDF_NAME(Height);
				1004	DECLARE_PDF_NAME(ImageMask);
				1005	DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
				1006	DECLARE_PDF_NAME(Interpolate);
				1007	DECLARE_PDF_NAME(Width);
				1008
				1009	// values
				1010	DECLARE_PDF_NAME(DeviceGray);
				1011	DECLARE_PDF_NAME(DeviceRGB);
				1012	DECLARE_PDF_NAME(DeviceCMYK);
				1013	DECLARE_PDF_NAME(Indexed);
				1014	DECLARE_PDF_NAME(ASCIIHexDecode);
				1015	DECLARE_PDF_NAME(ASCII85Decode);
				1016	DECLARE_PDF_NAME(LZWDecode);
				1017	DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
				1018	DECLARE_PDF_NAME(RunLengthDecode);
				1019	DECLARE_PDF_NAME(CCITTFaxDecode);
				1020	DECLARE_PDF_NAME(DCTDecode);
				1021
				1022	#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
				1023
				1024
				1025	static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
				1026	if (!key \|\| !key->isName()) {
				1027	return key;
				1028	}
				1029
				1030	// TODO(edisonn): use autogenerated code!
				1031	HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
				1032	HANDLE_NAME_ABBR(key, ColorSpace, CS);
				1033	HANDLE_NAME_ABBR(key, Decode, D);
				1034	HANDLE_NAME_ABBR(key, DecodeParms, DP);
				1035	HANDLE_NAME_ABBR(key, Filter, F);
				1036	HANDLE_NAME_ABBR(key, Height, H);
				1037	HANDLE_NAME_ABBR(key, ImageMask, IM);
				1038	// HANDLE_NAME_ABBR(key, Intent, );
				1039	HANDLE_NAME_ABBR(key, Interpolate, I);
				1040	HANDLE_NAME_ABBR(key, Width, W);
				1041
				1042	return key;
				1043	}
				1044
				1045	static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
				1046	if (!value \|\| !value->isName()) {
				1047	return value;
				1048	}
				1049
				1050	// TODO(edisonn): use autogenerated code!
				1051	HANDLE_NAME_ABBR(value, DeviceGray, G);
				1052	HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
				1053	HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
				1054	HANDLE_NAME_ABBR(value, Indexed, I);
				1055	HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
				1056	HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
				1057	HANDLE_NAME_ABBR(value, LZWDecode, LZW);
				1058	HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
				1059	HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
				1060	HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
				1061	HANDLE_NAME_ABBR(value, DCTDecode, DCT);
				1062
				1063	return value;
				1064	}
				1065
				1066	SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
				1067	// BI already processed
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1068	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1069	if (fUncompressedStream >= fUncompressedStreamEnd) {
				1070	return NULL;
				1071	}
				1072
				1073	SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
				1074	SkPdfObject::makeEmptyDictionary(inlineImage);
				1075
				1076	while (fUncompressedStream < fUncompressedStreamEnd) {
				1077	SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1078	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1079
edisonn@google.com	e878e72	2013-07-29 19:10:58 +0000	[diff] [blame]	1080	if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1081	fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1082	return inlineImage;
				1083	} else {
				1084	SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1085	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1086	// TODO(edisonn): perf maybe we should not expand abreviation like this
				1087	inlineImage->set(inlineImageKeyAbbreviationExpand(key),
				1088	inlineImageValueAbbreviationExpand(obj));
				1089	}
				1090	}
				1091	// TODO(edisonn): report end of data with inline image without an EI
				1092	return inlineImage;
				1093	}