Blame - experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - platform/external/skia

blob: a8881382b4acf58e6e03ae59019d3b00f3e37a28 [file] [log] [blame]

edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	1
				2	#include "SkPdfNativeTokenizer.h"
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	3	#include "SkPdfObject.h"
				4	#include "SkPdfConfig.h"
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	5
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	6	#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	7	#include "SkPdfImageDictionary_autogen.h"
				8
				9	// TODO(edisonn): perf!!!
				10	// there could be 0s between start and end! but not in the needle.
				11	static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
				12	int needleLen = strlen(needle);
				13	if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				14	strncmp(hayStart, needle, needleLen) == 0) {
				15	return hayStart;
				16	}
				17
				18	hayStart++;
				19
				20	while (hayStart < hayEnd) {
				21	if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
				22	(isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) \|\| (hayStart+needleLen == hayEnd)) &&
				23	strncmp(hayStart, needle, needleLen) == 0) {
				24	return hayStart;
				25	}
				26	hayStart++;
				27	}
				28	return NULL;
				29	}
				30
edisonn@google.com	e2e01ff	2013-08-02 20:24:48 +0000	[diff] [blame]	31	#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	32	static void TRACE_INDENT(int level, const char* type) {
				33	static int id = 0;
				34	id++;
edisonn@google.com	b0145ce	2013-08-05 16:23:23 +0000	[diff] [blame]	35	#if 0
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	36	if (478613 == id) {
				37	printf("break;\n");
				38	}
edisonn@google.com	b0145ce	2013-08-05 16:23:23 +0000	[diff] [blame]	39	#endif
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	40	// all types should have 2 letters, so the text is alligned nicely
				41	printf("\n%10i %15s: ", id, type);
				42	for (int i = 0 ; i < level; i++) {
				43	printf(" ");
				44	}
				45	}
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	46
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	47	static void TRACE_COMMENT(char ch) {
				48	printf("%c", ch);
				49	}
				50
				51	static void TRACE_TK(char ch) {
				52	printf("%c", ch);
				53	}
				54
				55	static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
				56	while (start < end) {
				57	printf("%c", *start);
				58	start++;
				59	}
				60	printf("\n");
				61	}
				62
				63	static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
				64	while (start < end) {
				65	printf("%c", *start);
				66	start++;
				67	}
				68	printf("\n");
				69	}
				70
				71	static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
				72	while (start < end) {
				73	printf("%c", *start);
				74	start++;
				75	}
				76	printf("\n");
				77	}
				78
				79	#else
				80	#define TRACE_INDENT(level,type)
				81	#define TRACE_COMMENT(ch)
				82	#define TRACE_TK(ch)
				83	#define TRACE_NAME(start,end)
				84	#define TRACE_STRING(start,end)
				85	#define TRACE_HEXSTRING(start,end)
				86	#endif
				87
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame]	88	const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	89	TRACE_INDENT(level, "White Space");
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame]	90	while (start < end && (isPdfWhiteSpace(start) \|\| start == kComment_PdfDelimiter)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	91	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	92	if (*start == kComment_PdfDelimiter) {
				93	// skip the comment until end of line
				94	while (start < end && !isPdfEOL(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	95	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	96	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	97	TRACE_COMMENT(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	98	}
				99	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	100	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	101	start++;
				102	}
				103	}
				104	return start;
				105	}
				106
				107	// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame]	108	const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	109	//int opened brackets
				110	//TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	111	TRACE_INDENT(level, "Token");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	112
				113	SkASSERT(!isPdfWhiteSpace(*start));
				114
				115	if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	116	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	117	start++;
				118	return start;
				119	}
				120
				121	while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	122	TRACE_TK(*start);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	123	start++;
				124	}
				125	return start;
				126	}
				127
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	128	// last elem has to be ]
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	129	static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame]	130	if (allocator == NULL) {
				131	// TODO(edisonn): report/warning error
				132	return end;
				133	}
				134
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	135	TRACE_INDENT(level, "Array");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	136	while (start < end) {
				137	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	138	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	139
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	140	const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	141
				142	if (endOfToken == start) {
				143	// TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
				144	return start;
				145	}
				146
				147	if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
				148	return endOfToken;
				149	}
				150
				151	SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	152	start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	153	// TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
				154	// we are sure they are not references!
				155	if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
				156	SkPdfObject* gen = array->removeLastInArray();
				157	SkPdfObject* id = array->removeLastInArray();
				158	newObj->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	159	SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	160	}
				161	array->appendInArray(newObj);
				162	}
				163	// TODO(edisonn): report not reached, we should never get here
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	164	// TODO(edisonn): there might be a bug here, enable an assert and run it on files
				165	// or it might be that the files were actually corrupted
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	166	return start;
				167	}
				168
				169	// When we read strings we will rewrite the string so we will reuse the memory
				170	// when we start to read the string, we already consumed the opened bracket
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	171
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	172	// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
				173
				174	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				175	TRACE_INDENT(level, "String");
				176	const unsigned char* in = start;
				177	bool hasOut = (out != NULL);
				178
				179	int openRoundBrackets = 1;
				180	while (in < end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	181	openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
				182	openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	183	if (openRoundBrackets == 0) {
				184	in++; // consumed )
				185	break;
				186	}
				187
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	188	if (*in == kEscape_PdfSpecial) {
				189	if (in + 1 < end) {
				190	switch (in[1]) {
				191	case 'n':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	192	if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	193	out++;
				194	in += 2;
				195	break;
				196
				197	case 'r':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	198	if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	199	out++;
				200	in += 2;
				201	break;
				202
				203	case 't':
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	204	if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	205	out++;
				206	in += 2;
				207	break;
				208
				209	case 'b':
				210	// TODO(edisonn): any special meaning to backspace?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	211	if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	212	out++;
				213	in += 2;
				214	break;
				215
				216	case 'f':
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame]	217	if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	218	out++;
				219	in += 2;
				220	break;
				221
				222	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	223	if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	224	out++;
				225	in += 2;
				226	break;
				227
				228	case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	229	if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	230	out++;
				231	in += 2;
				232	break;
				233
				234	case kEscape_PdfSpecial:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	235	if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	236	out++;
				237	in += 2;
				238	break;
				239
				240	case '0':
				241	case '1':
				242	case '2':
				243	case '3':
				244	case '4':
				245	case '5':
				246	case '6':
				247	case '7': {
				248	//read octals
				249	in++; // consume backslash
				250
				251	int code = 0;
				252	int i = 0;
				253	while (in < end && in >= '0' && in < '8') {
				254	code = (code << 3) + ((in) - '0'); // code 8 + d
				255	i++;
				256	in++;
				257	if (i == 3) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	258	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	259	out++;
				260	i = 0;
				261	}
				262	}
				263	if (i > 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	264	if (hasOut) { *out = code & 0xff; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	265	out++;
				266	}
				267	}
				268	break;
				269
				270	default:
				271	// Per spec, backslash is ignored is escaped ch is unknown
				272	in++;
				273	break;
				274	}
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	275	} else {
				276	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	277	}
				278	} else {
				279	// TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
				280	// we could have one look that first just inc current, and when we find the backslash
				281	// we go to this loop
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	282	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	283	in++;
				284	out++;
				285	}
				286	}
				287
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	288	if (hasOut) {
				289	return in; // consumed already ) at the end of the string
				290	} else {
				291	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				292	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	293	}
				294
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	295	static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
				296	return readString(level, start, end, NULL) - start;
				297	}
				298
				299	static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	300	if (!allocator) {
				301	return end;
				302	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	303	int outLength = readStringLength(level, start, end);
				304	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				305	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				306	start = readString(level, start, end, out);
				307	SkPdfObject::makeString(out, out + outLength, str);
				308	TRACE_STRING(out, out + outLength);
				309	return start; // consumed already ) at the end of the string
				310	}
				311
				312	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				313	TRACE_INDENT(level, "HexString");
				314	bool hasOut = (out != NULL);
				315	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	316
				317	unsigned char code = 0;
				318
				319	while (in < end) {
				320	while (in < end && isPdfWhiteSpace(*in)) {
				321	in++;
				322	}
				323
				324	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	325	//*in = '\0';
				326	in++; // consume >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	327	// normal exit
				328	break;
				329	}
				330
				331	if (in >= end) {
				332	// end too soon
				333	break;
				334	}
				335
				336	switch (*in) {
				337	case '0':
				338	case '1':
				339	case '2':
				340	case '3':
				341	case '4':
				342	case '5':
				343	case '6':
				344	case '7':
				345	case '8':
				346	case '9':
				347	code = (*in - '0') << 4;
				348	break;
				349
				350	case 'a':
				351	case 'b':
				352	case 'c':
				353	case 'd':
				354	case 'e':
				355	case 'f':
				356	code = (*in - 'a' + 10) << 4;
				357	break;
				358
				359	case 'A':
				360	case 'B':
				361	case 'C':
				362	case 'D':
				363	case 'E':
				364	case 'F':
				365	code = (*in - 'A' + 10) << 4;
				366	break;
				367
				368	// TODO(edisonn): spec does not say how to handle this error
				369	default:
				370	break;
				371	}
				372
				373	in++; // advance
				374
				375	while (in < end && isPdfWhiteSpace(*in)) {
				376	in++;
				377	}
				378
				379	// TODO(edisonn): report error
				380	if (in >= end) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	381	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	382	out++;
				383	break;
				384	}
				385
				386	if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	387	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	388	out++;
edisonn@google.com	1acab36	2013-07-25 22:03:22 +0000	[diff] [blame]	389	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	390	break;
				391	}
				392
				393	switch (*in) {
				394	case '0':
				395	case '1':
				396	case '2':
				397	case '3':
				398	case '4':
				399	case '5':
				400	case '6':
				401	case '7':
				402	case '8':
				403	case '9':
				404	code += (*in - '0');
				405	break;
				406
				407	case 'a':
				408	case 'b':
				409	case 'c':
				410	case 'd':
				411	case 'e':
				412	case 'f':
				413	code += (*in - 'a' + 10);
				414	break;
				415
				416	case 'A':
				417	case 'B':
				418	case 'C':
				419	case 'D':
				420	case 'E':
				421	case 'F':
				422	code += (*in - 'A' + 10);
				423	break;
				424
				425	// TODO(edisonn): spec does not say how to handle this error
				426	default:
				427	break;
				428	}
				429
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	430	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	431	out++;
				432	in++;
				433	}
				434
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	435	if (hasOut) {
				436	return in; // consumed already > at the end of the string
				437	} else {
				438	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	439	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	440	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	441
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	442	static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
				443	return readHexString(level, start, end, NULL) - start;
				444	}
				445
				446	static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	447	if (!allocator) {
				448	return end;
				449	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	450	int outLength = readHexStringLength(level, start, end);
				451	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				452	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				453	start = readHexString(level, start, end, out);
				454	SkPdfObject::makeHexString(out, out + outLength, str);
				455	TRACE_HEXSTRING(out, out + outLength);
				456	return start; // consumed already > at the end of the string
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	457	}
				458
				459	// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	460	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
				461	TRACE_INDENT(level, "Name");
				462	bool hasOut = (out != NULL);
				463	const unsigned char* in = start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	464
				465	unsigned char code = 0;
				466
				467	while (in < end) {
				468	if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
				469	break;
				470	}
				471
				472	if (*in == '#' && in + 2 < end) {
				473	in++;
				474	switch (*in) {
				475	case '0':
				476	case '1':
				477	case '2':
				478	case '3':
				479	case '4':
				480	case '5':
				481	case '6':
				482	case '7':
				483	case '8':
				484	case '9':
				485	code = (*in - '0') << 4;
				486	break;
				487
				488	case 'a':
				489	case 'b':
				490	case 'c':
				491	case 'd':
				492	case 'e':
				493	case 'f':
				494	code = (*in - 'a' + 10) << 4;
				495	break;
				496
				497	case 'A':
				498	case 'B':
				499	case 'C':
				500	case 'D':
				501	case 'E':
				502	case 'F':
				503	code = (*in - 'A' + 10) << 4;
				504	break;
				505
				506	// TODO(edisonn): spec does not say how to handle this error
				507	default:
				508	break;
				509	}
				510
				511	in++; // advance
				512
				513	switch (*in) {
				514	case '0':
				515	case '1':
				516	case '2':
				517	case '3':
				518	case '4':
				519	case '5':
				520	case '6':
				521	case '7':
				522	case '8':
				523	case '9':
				524	code += (*in - '0');
				525	break;
				526
				527	case 'a':
				528	case 'b':
				529	case 'c':
				530	case 'd':
				531	case 'e':
				532	case 'f':
				533	code += (*in - 'a' + 10);
				534	break;
				535
				536	case 'A':
				537	case 'B':
				538	case 'C':
				539	case 'D':
				540	case 'E':
				541	case 'F':
				542	code += (*in - 'A' + 10);
				543	break;
				544
				545	// TODO(edisonn): spec does not say how to handle this error
				546	default:
				547	break;
				548	}
				549
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	550	if (hasOut) { *out = code; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	551	out++;
				552	in++;
				553	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	554	if (hasOut) { out = in; }
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	555	out++;
				556	in++;
				557	}
				558	}
				559
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	560	if (hasOut) {
				561	return in;
				562	} else {
				563	return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
				564	}
				565	}
				566
				567	static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
				568	return readName(level, start, end, NULL) - start;
				569	}
				570
				571	static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.com	b44334c	2013-07-23 20:47:05 +0000	[diff] [blame]	572	if (!allocator) {
				573	return end;
				574	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	575	int outLength = readNameLength(level, start, end);
				576	// TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
				577	unsigned char* out = (unsigned char*)allocator->alloc(outLength);
				578	start = readName(level, start, end, out);
				579	SkPdfObject::makeName(out, out + outLength, name);
				580	TRACE_NAME(out, out + outLength);
				581	return start;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	582	}
				583
				584	// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
				585	// that makes for an interesting scenario, where the stream itself contains endstream, together
				586	// with a reference object with the length, but the real length object would be somewhere else
				587	// it could confuse the parser
				588	/*example:
				589
				590	7 0 obj
				591	<< /length 8 0 R>>
				592	stream
				593	...............
				594	endstream
				595	8 0 obj #we are in stream actually, not a real object
				596	<< 10 >> #we are in stream actually, not a real object
				597	endobj
				598	endstream
				599	8 0 obj #real obj
				600	<< 100 >> #real obj
				601	endobj
				602	and it could get worse, with multiple object like this
				603	*/
				604
				605	// right now implement the silly algorithm that assumes endstream is finishing the stream
				606
				607
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	608	static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
				609	TRACE_INDENT(level, "Stream");
				610	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	611	if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
				612	// no stream. return.
				613	return start;
				614	}
				615
				616	start += 6; // strlen("stream")
				617	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				618	start += 2;
				619	} else if (start[0] == kLF_PdfWhiteSpace) {
				620	start += 1;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	621	} else if (isPdfWhiteSpace(start[0])) {
				622	start += 1;
				623	} else {
				624	// TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
				625	// TODO(edisonn): warning?
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	626	}
				627
				628	SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
				629	// TODO(edisonn): load Length
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	630	int64_t length = -1;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	631
				632	// TODO(edisonn): very basic implementation
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	633	if (stream->has_Length() && stream->Length(doc) > 0) {
				634	length = stream->Length(doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	635	}
				636
				637	// TODO(edisonn): laod external streams
				638	// TODO(edisonn): look at the last filter, to determione how to deal with possible issue
				639
edisonn@google.com	4ef4bed	2013-07-29 22:14:45 +0000	[diff] [blame]	640
				641	if (length >= 0) {
				642	const unsigned char* endstream = start + length;
				643
				644	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				645	endstream += 2;
				646	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				647	endstream += 1;
				648	}
				649
				650	if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
				651	length = -1;
				652	}
				653	}
				654
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	655	if (length < 0) {
				656	// scan the buffer, until we find first endstream
				657	// TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	658	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "endstream");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	659
				660	if (endstream) {
				661	length = endstream - start;
				662	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	663	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	664	}
				665	}
				666	if (length >= 0) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	667	const unsigned char* endstream = start + length;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	668
				669	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				670	endstream += 2;
				671	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				672	endstream += 1;
				673	}
				674
				675	// TODO(edisonn): verify the next bytes are "endstream"
				676
				677	endstream += strlen("endstream");
				678	// TODO(edisonn): Assert? report error/warning?
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	679	dict->addStream(start, (size_t)length);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	680	return endstream;
				681	}
				682	return start;
				683	}
				684
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	685	static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
				686	TRACE_INDENT(level, "Inline Image");
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	687	// We already processed ID keyword, and we should be positioned immediately after it
				688
				689	// TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
				690	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				691	start += 2;
				692	} else if (start[0] == kLF_PdfWhiteSpace) {
				693	start += 1;
				694	} else if (isPdfWhiteSpace(start[0])) {
				695	start += 1;
				696	} else {
				697	SkASSERT(isPdfDelimiter(start[0]));
				698	// TODO(edisonn): warning?
				699	}
				700
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	701	const unsigned char* endstream = (const unsigned char)strrstrk((char)start, (char*)end, "EI");
				702	const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	703
				704	if (endstream) {
				705	int length = endstream - start;
				706	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
				707	if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
				708	inlineImage->addStream(start, (size_t)length);
				709	} else {
				710	// TODO(edisonn): report error in inline image stream (ID-EI) section
				711	// TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
				712	return end;
				713	}
				714	return endEI;
				715	}
				716
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	717	static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	1f08016	2013-07-23 21:05:49 +0000	[diff] [blame]	718	if (allocator == NULL) {
				719	// TODO(edisonn): report/warning error
				720	return end;
				721	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	722	TRACE_INDENT(level, "Dictionary");
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	723	SkPdfObject::makeEmptyDictionary(dict);
				724
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	725	start = skipPdfWhiteSpaces(level, start, end);
				726	SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	727
				728	while (start < end && *start == kNamed_PdfDelimiter) {
				729	SkPdfObject key;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	730	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	731	start++;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	732	start = readName(level + 1, start, end, &key, &tmpStorage);
				733	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	734
				735	if (start < end) {
				736	SkPdfObject* value = allocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	737	start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	738
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	739	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	740
				741	if (start < end) {
				742	// seems we have an indirect reference
				743	if (isPdfDigit(*start)) {
				744	SkPdfObject generation;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	745	start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	746
				747	SkPdfObject keywordR;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	748	start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	749
				750	if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
				751	int64_t id = value->intValue();
				752	value->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	753	SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	754	dict->set(&key, value);
				755	} else {
				756	// error, ignore
				757	dict->set(&key, value);
				758	}
				759	} else {
				760	// next elem is not a digit, but it might not be / either!
				761	dict->set(&key, value);
				762	}
				763	} else {
				764	// /key >>
				765	dict->set(&key, value);
				766	return end;
				767	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	768	start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	769	} else {
				770	dict->set(&key, &SkPdfObject::kNull);
				771	return end;
				772	}
				773	}
				774
				775	// TODO(edisonn): options to ignore these errors
				776
				777	// now we should expect >>
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	778	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	779	if (*start != kClosedInequityBracket_PdfDelimiter) {
				780	// TODO(edisonn): report/warning
				781	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	782	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	783	start++; // skip >
				784	if (*start != kClosedInequityBracket_PdfDelimiter) {
				785	// TODO(edisonn): report/warning
				786	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	787	//*start = '\0';
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	788	start++; // skip >
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	789
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	790	start = readStream(level, start, end, dict, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	791
				792	return start;
				793	}
				794
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	795	const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
				796	const unsigned char* current;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	797
				798	// skip white spaces
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	799	start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	800
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	801	current = endOfPdfToken(level, start, end);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	802
				803	// no token, len would be 0
				804	if (current == start) {
				805	return NULL;
				806	}
				807
				808	int tokenLen = current - start;
				809
				810	if (tokenLen == 1) {
				811	// start array
				812	switch (*start) {
				813	case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	814	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	815	SkPdfObject::makeEmptyArray(token);
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	816	return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	817
				818	case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	819	//*start = '\0';
				820	return readString(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	821
				822	case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	823	//*start = '\0';
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	824	if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	825	//start[1] = '\0'; // optional
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	826	// TODO(edisonn): pass here the length somehow?
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	827	return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	828	} else {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	829	return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	830	}
				831
				832	case kNamed_PdfDelimiter:
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	833	//*start = '\0';
				834	return readName(level, start + 1, end, token, allocator);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	835
				836	// TODO(edisonn): what to do curly brackets? read spec!
				837	case kOpenedCurlyBracket_PdfDelimiter:
				838	default:
				839	break;
				840	}
				841
				842	SkASSERT(!isPdfWhiteSpace(*start));
				843	if (isPdfDelimiter(*start)) {
				844	// TODO(edisonn): how stream ] } > ) will be handled?
				845	// for now ignore, and it will become a keyword to be ignored
				846	}
				847	}
				848
				849	if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
				850	SkPdfObject::makeNull(token);
				851	return current;
				852	}
				853
				854	if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
				855	SkPdfObject::makeBoolean(true, token);
				856	return current;
				857	}
				858
edisonn@google.com	f111a4b	2013-07-31 18:22:36 +0000	[diff] [blame]	859	if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	860	SkPdfObject::makeBoolean(false, token);
				861	return current;
				862	}
				863
				864	if (isPdfNumeric(*start)) {
				865	SkPdfObject::makeNumeric(start, current, token);
				866	} else {
				867	SkPdfObject::makeKeyword(start, current, token);
				868	}
				869	return current;
				870	}
				871
				872	SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	873	fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	874	return new SkPdfObject[BUFFER_SIZE];
				875	}
				876
				877	SkPdfAllocator::~SkPdfAllocator() {
				878	for (int i = 0 ; i < fHandles.count(); i++) {
				879	free(fHandles[i]);
				880	}
				881	for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	882	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				883	fHistory[i][j].reset();
				884	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	885	delete[] fHistory[i];
				886	}
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	887	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				888	fCurrent[j].reset();
				889	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	890	delete[] fCurrent;
				891	}
				892
				893	SkPdfObject* SkPdfAllocator::allocObject() {
				894	if (fCurrentUsed >= BUFFER_SIZE) {
				895	fHistory.push(fCurrent);
				896	fCurrent = allocBlock();
				897	fCurrentUsed = 0;
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	898	fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	899	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	900	fCurrentUsed++;
				901	return &fCurrent[fCurrentUsed - 1];
				902	}
				903
				904	// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	905	SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	906	const unsigned char* buffer = NULL;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	907	size_t len = 0;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	908	objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	909	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	910	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	911	if (endobj) {
				912	len = endobj - (char*)buffer + strlen("endobj");
				913	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	914	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	915	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	916	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	917
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	918	SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	919	// TODO(edisonn): hack, find end of object
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	920	char* endobj = strrstrk((char)buffer, (char)buffer + len, "endobj");
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	921	if (endobj) {
				922	len = endobj - (char*)buffer + strlen("endobj");
				923	}
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	924	fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	925	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	926	}
				927
				928	SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	929	}
				930
				931	bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com	91ce698	2013-08-05 20:45:40 +0000	[diff] [blame^]	932	SkPdfObject obj;
				933	#ifdef PDF_TRACE_READ_TOKEN
				934	static int read_op = 0;
				935	int last;
				936	#endif
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	937	token->fKeyword = NULL;
				938	token->fObject = NULL;
				939
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	940	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	941	if (fUncompressedStream >= fUncompressedStreamEnd) {
				942	return false;
				943	}
				944
edisonn@google.com	91ce698	2013-08-05 20:45:40 +0000	[diff] [blame^]	945	#ifdef PDF_TRACE_READ_TOKEN
				946	printf("BEFORE the read: %i\n", read_op);
				947	last = read_op;
				948	#endif
				949
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	950	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com	91ce698	2013-08-05 20:45:40 +0000	[diff] [blame^]	951	#ifdef PDF_TRACE_READ_TOKEN
				952	printf("BEFORE the read: %i\n", read_op);
				953	if (last != read_op) {
				954	printf("break; // memory override");
				955	}
				956	#endif
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	957
				958	// If it is a keyword, we will only get the pointer of the string
				959	if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
				960	token->fKeyword = obj.c_str();
edisonn@google.com	e878e72	2013-07-29 19:10:58 +0000	[diff] [blame]	961	token->fKeywordLength = obj.lenstr();
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	962	token->fType = kKeyword_TokenType;
				963	} else {
				964	SkPdfObject* pobj = fAllocator->allocObject();
				965	*pobj = obj;
				966	token->fObject = pobj;
				967	token->fType = kObject_TokenType;
				968	}
				969
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	970	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	971	read_op++;
edisonn@google.com	b0145ce	2013-08-05 16:23:23 +0000	[diff] [blame]	972	#if 0
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	973	if (548 == read_op) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	974	printf("break;\n");
				975	}
edisonn@google.com	b0145ce	2013-08-05 16:23:23 +0000	[diff] [blame]	976	#endif
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	977	printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				978	#endif
				979
				980	return true;
				981	}
				982
				983	void SkPdfNativeTokenizer::PutBack(PdfToken token) {
				984	SkASSERT(!fHasPutBack);
				985	fHasPutBack = true;
				986	fPutBack = token;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	987	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	988	printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
				989	#endif
				990	}
				991
				992	bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
				993	if (fHasPutBack) {
				994	*token = fPutBack;
				995	fHasPutBack = false;
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	996	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	997	printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				998	#endif
				999	return true;
				1000	}
				1001
				1002	if (fEmpty) {
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1003	#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	1004	printf("EMPTY TOKENIZER\n");
				1005	#endif
				1006	return false;
				1007	}
				1008
				1009	return readTokenCore(token);
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	1010	}
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1011
				1012	#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
				1013
				1014	// keys
				1015	DECLARE_PDF_NAME(BitsPerComponent);
				1016	DECLARE_PDF_NAME(ColorSpace);
				1017	DECLARE_PDF_NAME(Decode);
				1018	DECLARE_PDF_NAME(DecodeParms);
				1019	DECLARE_PDF_NAME(Filter);
				1020	DECLARE_PDF_NAME(Height);
				1021	DECLARE_PDF_NAME(ImageMask);
				1022	DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
				1023	DECLARE_PDF_NAME(Interpolate);
				1024	DECLARE_PDF_NAME(Width);
				1025
				1026	// values
				1027	DECLARE_PDF_NAME(DeviceGray);
				1028	DECLARE_PDF_NAME(DeviceRGB);
				1029	DECLARE_PDF_NAME(DeviceCMYK);
				1030	DECLARE_PDF_NAME(Indexed);
				1031	DECLARE_PDF_NAME(ASCIIHexDecode);
				1032	DECLARE_PDF_NAME(ASCII85Decode);
				1033	DECLARE_PDF_NAME(LZWDecode);
				1034	DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
				1035	DECLARE_PDF_NAME(RunLengthDecode);
				1036	DECLARE_PDF_NAME(CCITTFaxDecode);
				1037	DECLARE_PDF_NAME(DCTDecode);
				1038
				1039	#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
				1040
				1041
				1042	static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
				1043	if (!key \|\| !key->isName()) {
				1044	return key;
				1045	}
				1046
				1047	// TODO(edisonn): use autogenerated code!
				1048	HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
				1049	HANDLE_NAME_ABBR(key, ColorSpace, CS);
				1050	HANDLE_NAME_ABBR(key, Decode, D);
				1051	HANDLE_NAME_ABBR(key, DecodeParms, DP);
				1052	HANDLE_NAME_ABBR(key, Filter, F);
				1053	HANDLE_NAME_ABBR(key, Height, H);
				1054	HANDLE_NAME_ABBR(key, ImageMask, IM);
				1055	// HANDLE_NAME_ABBR(key, Intent, );
				1056	HANDLE_NAME_ABBR(key, Interpolate, I);
				1057	HANDLE_NAME_ABBR(key, Width, W);
				1058
				1059	return key;
				1060	}
				1061
				1062	static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
				1063	if (!value \|\| !value->isName()) {
				1064	return value;
				1065	}
				1066
				1067	// TODO(edisonn): use autogenerated code!
				1068	HANDLE_NAME_ABBR(value, DeviceGray, G);
				1069	HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
				1070	HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
				1071	HANDLE_NAME_ABBR(value, Indexed, I);
				1072	HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
				1073	HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
				1074	HANDLE_NAME_ABBR(value, LZWDecode, LZW);
				1075	HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
				1076	HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
				1077	HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
				1078	HANDLE_NAME_ABBR(value, DCTDecode, DCT);
				1079
				1080	return value;
				1081	}
				1082
				1083	SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
				1084	// BI already processed
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1085	fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1086	if (fUncompressedStream >= fUncompressedStreamEnd) {
				1087	return NULL;
				1088	}
				1089
				1090	SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
				1091	SkPdfObject::makeEmptyDictionary(inlineImage);
				1092
				1093	while (fUncompressedStream < fUncompressedStreamEnd) {
				1094	SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1095	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1096
edisonn@google.com	e878e72	2013-07-29 19:10:58 +0000	[diff] [blame]	1097	if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1098	fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1099	return inlineImage;
				1100	} else {
				1101	SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com	2ccc3af	2013-07-23 17:43:18 +0000	[diff] [blame]	1102	fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com	78b38b1	2013-07-15 18:20:58 +0000	[diff] [blame]	1103	// TODO(edisonn): perf maybe we should not expand abreviation like this
				1104	inlineImage->set(inlineImageKeyAbbreviationExpand(key),
				1105	inlineImageValueAbbreviationExpand(obj));
				1106	}
				1107	}
				1108	// TODO(edisonn): report end of data with inline image without an EI
				1109	return inlineImage;
				1110	}