Blame - experimental/PdfViewer/pdfparser/native/SkPdfNativeTokenizer.cpp - platform/external/skia

blob: de49e35f11039428987ecb573986329f61c243e3 [file] [log] [blame]

edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	1
				2	#include "SkPdfNativeTokenizer.h"
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	3	#include "SkPdfObject.h"
				4	#include "SkPdfConfig.h"
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	5
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	6	#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	7
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	8	static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	9	while (start < end && isPdfWhiteSpace(*start)) {
				10	if (*start == kComment_PdfDelimiter) {
				11	// skip the comment until end of line
				12	while (start < end && !isPdfEOL(*start)) {
				13	*start = '\0';
				14	start++;
				15	}
				16	} else {
				17	*start = '\0';
				18	start++;
				19	}
				20	}
				21	return start;
				22	}
				23
				24	// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	25	static unsigned char* endOfPdfToken(unsigned char* start, unsigned char* end) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	26	//int opened brackets
				27	//TODO(edisonn): what out for special chars, like \n, \032
				28
				29	SkASSERT(!isPdfWhiteSpace(*start));
				30
				31	if (start < end && isPdfDelimiter(*start)) {
				32	start++;
				33	return start;
				34	}
				35
				36	while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
				37	start++;
				38	}
				39	return start;
				40	}
				41
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	42	// last elem has to be ]
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	43	static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	44	while (start < end) {
				45	// skip white spaces
				46	start = skipPdfWhiteSpaces(start, end);
				47
				48	unsigned char* endOfToken = endOfPdfToken(start, end);
				49
				50	if (endOfToken == start) {
				51	// TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
				52	return start;
				53	}
				54
				55	if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
				56	return endOfToken;
				57	}
				58
				59	SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	60	start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	61	// TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
				62	// we are sure they are not references!
				63	if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
				64	SkPdfObject* gen = array->removeLastInArray();
				65	SkPdfObject* id = array->removeLastInArray();
				66	newObj->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	67	SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	68	}
				69	array->appendInArray(newObj);
				70	}
				71	// TODO(edisonn): report not reached, we should never get here
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	72	// TODO(edisonn): there might be a bug here, enable an assert and run it on files
				73	// or it might be that the files were actually corrupted
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	74	return start;
				75	}
				76
				77	// When we read strings we will rewrite the string so we will reuse the memory
				78	// when we start to read the string, we already consumed the opened bracket
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	79	static unsigned char* readString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	80	unsigned char* out = start;
				81	unsigned char* in = start;
				82
				83	int openRoundBrackets = 0;
				84	while (in < end && (*in != kClosedRoundBracket_PdfDelimiter \|\| openRoundBrackets > 0)) {
				85	openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
				86	openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
				87	if (*in == kEscape_PdfSpecial) {
				88	if (in + 1 < end) {
				89	switch (in[1]) {
				90	case 'n':
				91	*out = kLF_PdfWhiteSpace;
				92	out++;
				93	in += 2;
				94	break;
				95
				96	case 'r':
				97	*out = kCR_PdfWhiteSpace;
				98	out++;
				99	in += 2;
				100	break;
				101
				102	case 't':
				103	*out = kHT_PdfWhiteSpace;
				104	out++;
				105	in += 2;
				106	break;
				107
				108	case 'b':
				109	// TODO(edisonn): any special meaning to backspace?
				110	*out = kBackspace_PdfSpecial;
				111	out++;
				112	in += 2;
				113	break;
				114
				115	case 'f':
				116	*out = kFF_PdfWhiteSpace;
				117	out++;
				118	in += 2;
				119	break;
				120
				121	case kOpenedRoundBracket_PdfDelimiter:
				122	*out = kOpenedRoundBracket_PdfDelimiter;
				123	out++;
				124	in += 2;
				125	break;
				126
				127	case kClosedRoundBracket_PdfDelimiter:
				128	*out = kClosedRoundBracket_PdfDelimiter;
				129	out++;
				130	in += 2;
				131	break;
				132
				133	case kEscape_PdfSpecial:
				134	*out = kEscape_PdfSpecial;
				135	out++;
				136	in += 2;
				137	break;
				138
				139	case '0':
				140	case '1':
				141	case '2':
				142	case '3':
				143	case '4':
				144	case '5':
				145	case '6':
				146	case '7': {
				147	//read octals
				148	in++; // consume backslash
				149
				150	int code = 0;
				151	int i = 0;
				152	while (in < end && in >= '0' && in < '8') {
				153	code = (code << 3) + ((in) - '0'); // code 8 + d
				154	i++;
				155	in++;
				156	if (i == 3) {
				157	*out = code & 0xff;
				158	out++;
				159	i = 0;
				160	}
				161	}
				162	if (i > 0) {
				163	*out = code & 0xff;
				164	out++;
				165	}
				166	}
				167	break;
				168
				169	default:
				170	// Per spec, backslash is ignored is escaped ch is unknown
				171	in++;
				172	break;
				173	}
edisonn@google.com	8bad737	2013-07-10 23:36:56 +0000	[diff] [blame]	174	} else {
				175	in++;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	176	}
				177	} else {
				178	// TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
				179	// we could have one look that first just inc current, and when we find the backslash
				180	// we go to this loop
				181	in = out;
				182	in++;
				183	out++;
				184	}
				185	}
				186
				187
				188	SkPdfObject::makeString(start, out, str);
				189	return in + 1; // consume ) at the end of the string
				190	}
				191
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	192	static unsigned char* readHexString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	193	unsigned char* out = start;
				194	unsigned char* in = start;
				195
				196	unsigned char code = 0;
				197
				198	while (in < end) {
				199	while (in < end && isPdfWhiteSpace(*in)) {
				200	in++;
				201	}
				202
				203	if (*in == kClosedInequityBracket_PdfDelimiter) {
				204	*in = '\0';
				205	in++;
				206	// normal exit
				207	break;
				208	}
				209
				210	if (in >= end) {
				211	// end too soon
				212	break;
				213	}
				214
				215	switch (*in) {
				216	case '0':
				217	case '1':
				218	case '2':
				219	case '3':
				220	case '4':
				221	case '5':
				222	case '6':
				223	case '7':
				224	case '8':
				225	case '9':
				226	code = (*in - '0') << 4;
				227	break;
				228
				229	case 'a':
				230	case 'b':
				231	case 'c':
				232	case 'd':
				233	case 'e':
				234	case 'f':
				235	code = (*in - 'a' + 10) << 4;
				236	break;
				237
				238	case 'A':
				239	case 'B':
				240	case 'C':
				241	case 'D':
				242	case 'E':
				243	case 'F':
				244	code = (*in - 'A' + 10) << 4;
				245	break;
				246
				247	// TODO(edisonn): spec does not say how to handle this error
				248	default:
				249	break;
				250	}
				251
				252	in++; // advance
				253
				254	while (in < end && isPdfWhiteSpace(*in)) {
				255	in++;
				256	}
				257
				258	// TODO(edisonn): report error
				259	if (in >= end) {
				260	*out = code;
				261	out++;
				262	break;
				263	}
				264
				265	if (*in == kClosedInequityBracket_PdfDelimiter) {
				266	*out = code;
				267	out++;
				268	break;
				269	}
				270
				271	switch (*in) {
				272	case '0':
				273	case '1':
				274	case '2':
				275	case '3':
				276	case '4':
				277	case '5':
				278	case '6':
				279	case '7':
				280	case '8':
				281	case '9':
				282	code += (*in - '0');
				283	break;
				284
				285	case 'a':
				286	case 'b':
				287	case 'c':
				288	case 'd':
				289	case 'e':
				290	case 'f':
				291	code += (*in - 'a' + 10);
				292	break;
				293
				294	case 'A':
				295	case 'B':
				296	case 'C':
				297	case 'D':
				298	case 'E':
				299	case 'F':
				300	code += (*in - 'A' + 10);
				301	break;
				302
				303	// TODO(edisonn): spec does not say how to handle this error
				304	default:
				305	break;
				306	}
				307
				308	*out = code;
				309	out++;
				310	in++;
				311	}
				312
				313	if (out < in) {
				314	*out = '\0';
				315	}
				316
				317	SkPdfObject::makeHexString(start, out, str);
				318	return in; // consume > at the end of the string
				319	}
				320
				321	// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	322	static unsigned char* readName(unsigned char* start, unsigned char* end, SkPdfObject* name) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	323	unsigned char* out = start;
				324	unsigned char* in = start;
				325
				326	unsigned char code = 0;
				327
				328	while (in < end) {
				329	if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
				330	break;
				331	}
				332
				333	if (*in == '#' && in + 2 < end) {
				334	in++;
				335	switch (*in) {
				336	case '0':
				337	case '1':
				338	case '2':
				339	case '3':
				340	case '4':
				341	case '5':
				342	case '6':
				343	case '7':
				344	case '8':
				345	case '9':
				346	code = (*in - '0') << 4;
				347	break;
				348
				349	case 'a':
				350	case 'b':
				351	case 'c':
				352	case 'd':
				353	case 'e':
				354	case 'f':
				355	code = (*in - 'a' + 10) << 4;
				356	break;
				357
				358	case 'A':
				359	case 'B':
				360	case 'C':
				361	case 'D':
				362	case 'E':
				363	case 'F':
				364	code = (*in - 'A' + 10) << 4;
				365	break;
				366
				367	// TODO(edisonn): spec does not say how to handle this error
				368	default:
				369	break;
				370	}
				371
				372	in++; // advance
				373
				374	switch (*in) {
				375	case '0':
				376	case '1':
				377	case '2':
				378	case '3':
				379	case '4':
				380	case '5':
				381	case '6':
				382	case '7':
				383	case '8':
				384	case '9':
				385	code += (*in - '0');
				386	break;
				387
				388	case 'a':
				389	case 'b':
				390	case 'c':
				391	case 'd':
				392	case 'e':
				393	case 'f':
				394	code += (*in - 'a' + 10);
				395	break;
				396
				397	case 'A':
				398	case 'B':
				399	case 'C':
				400	case 'D':
				401	case 'E':
				402	case 'F':
				403	code += (*in - 'A' + 10);
				404	break;
				405
				406	// TODO(edisonn): spec does not say how to handle this error
				407	default:
				408	break;
				409	}
				410
				411	*out = code;
				412	out++;
				413	in++;
				414	} else {
				415	out = in;
				416	out++;
				417	in++;
				418	}
				419	}
				420
				421	SkPdfObject::makeName(start, out, name);
				422	return in;
				423	}
				424
				425	// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
				426	// that makes for an interesting scenario, where the stream itself contains endstream, together
				427	// with a reference object with the length, but the real length object would be somewhere else
				428	// it could confuse the parser
				429	/*example:
				430
				431	7 0 obj
				432	<< /length 8 0 R>>
				433	stream
				434	...............
				435	endstream
				436	8 0 obj #we are in stream actually, not a real object
				437	<< 10 >> #we are in stream actually, not a real object
				438	endobj
				439	endstream
				440	8 0 obj #real obj
				441	<< 100 >> #real obj
				442	endobj
				443	and it could get worse, with multiple object like this
				444	*/
				445
				446	// right now implement the silly algorithm that assumes endstream is finishing the stream
				447
				448
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	449	static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	450	start = skipPdfWhiteSpaces(start, end);
				451	if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
				452	// no stream. return.
				453	return start;
				454	}
				455
				456	start += 6; // strlen("stream")
				457	if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
				458	start += 2;
				459	} else if (start[0] == kLF_PdfWhiteSpace) {
				460	start += 1;
				461	}
				462
				463	SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
				464	// TODO(edisonn): load Length
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	465	int64_t length = -1;
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	466
				467	// TODO(edisonn): very basic implementation
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	468	if (stream->has_Length() && stream->Length(doc) > 0) {
				469	length = stream->Length(doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	470	}
				471
				472	// TODO(edisonn): laod external streams
				473	// TODO(edisonn): look at the last filter, to determione how to deal with possible issue
				474
				475	if (length < 0) {
				476	// scan the buffer, until we find first endstream
				477	// TODO(edisonn): all buffers must have a 0 at the end now,
				478	// TODO(edisonn): hack (mark end of content with 0)
				479	unsigned char lastCh = *end;
				480	*end = '\0';
				481	//SkASSERT(*end == '\0');
				482	unsigned char* endstream = (unsigned char)strstr((const char)start, "endstream");
				483	*end = lastCh;
				484
				485	if (endstream) {
				486	length = endstream - start;
				487	if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
				488	if (*(endstream-1) == kCR_PdfWhiteSpace) length--;
				489	}
				490	}
				491	if (length >= 0) {
				492	unsigned char* endstream = start + length;
				493
				494	if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
				495	endstream += 2;
				496	} else if (endstream[0] == kLF_PdfWhiteSpace) {
				497	endstream += 1;
				498	}
				499
				500	// TODO(edisonn): verify the next bytes are "endstream"
				501
				502	endstream += strlen("endstream");
				503	// TODO(edisonn): Assert? report error/warning?
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	504	dict->addStream(start, (size_t)length);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	505	return endstream;
				506	}
				507	return start;
				508	}
				509
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	510	static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	511	SkPdfObject::makeEmptyDictionary(dict);
				512
				513	start = skipPdfWhiteSpaces(start, end);
				514
				515	while (start < end && *start == kNamed_PdfDelimiter) {
				516	SkPdfObject key;
				517	*start = '\0';
				518	start++;
				519	start = readName(start, end, &key);
				520	start = skipPdfWhiteSpaces(start, end);
				521
				522	if (start < end) {
				523	SkPdfObject* value = allocator->allocObject();
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	524	start = nextObject(start, end, value, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	525
				526	start = skipPdfWhiteSpaces(start, end);
				527
				528	if (start < end) {
				529	// seems we have an indirect reference
				530	if (isPdfDigit(*start)) {
				531	SkPdfObject generation;
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	532	start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	533
				534	SkPdfObject keywordR;
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	535	start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	536
				537	if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
				538	int64_t id = value->intValue();
				539	value->reset();
edisonn@google.com	a3356fc	2013-07-10 18:20:06 +0000	[diff] [blame]	540	SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	541	dict->set(&key, value);
				542	} else {
				543	// error, ignore
				544	dict->set(&key, value);
				545	}
				546	} else {
				547	// next elem is not a digit, but it might not be / either!
				548	dict->set(&key, value);
				549	}
				550	} else {
				551	// /key >>
				552	dict->set(&key, value);
				553	return end;
				554	}
				555	start = skipPdfWhiteSpaces(start, end);
				556	} else {
				557	dict->set(&key, &SkPdfObject::kNull);
				558	return end;
				559	}
				560	}
				561
				562	// TODO(edisonn): options to ignore these errors
				563
				564	// now we should expect >>
				565	start = skipPdfWhiteSpaces(start, end);
				566	start = endOfPdfToken(start, end); // >
				567	start = endOfPdfToken(start, end); // >
				568
				569	// TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...
				570	// or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ?
				571
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	572	start = readStream(start, end, dict, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	573
				574	return start;
				575	}
				576
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	577	unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	578	unsigned char* current;
				579
				580	// skip white spaces
				581	start = skipPdfWhiteSpaces(start, end);
				582
				583	current = endOfPdfToken(start, end);
				584
				585	// no token, len would be 0
				586	if (current == start) {
				587	return NULL;
				588	}
				589
				590	int tokenLen = current - start;
				591
				592	if (tokenLen == 1) {
				593	// start array
				594	switch (*start) {
				595	case kOpenedSquareBracket_PdfDelimiter:
				596	*start = '\0';
				597	SkPdfObject::makeEmptyArray(token);
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	598	return readArray(current, end, token, allocator, doc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	599
				600	case kOpenedRoundBracket_PdfDelimiter:
				601	*start = '\0';
				602	return readString(start, end, token);
				603
				604	case kOpenedInequityBracket_PdfDelimiter:
				605	*start = '\0';
				606	if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
				607	// TODO(edisonn): pass here the length somehow?
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	608	return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	609	} else {
				610	return readHexString(start + 1, end, token); // skip <
				611	}
				612
				613	case kNamed_PdfDelimiter:
				614	*start = '\0';
				615	return readName(start + 1, end, token);
				616
				617	// TODO(edisonn): what to do curly brackets? read spec!
				618	case kOpenedCurlyBracket_PdfDelimiter:
				619	default:
				620	break;
				621	}
				622
				623	SkASSERT(!isPdfWhiteSpace(*start));
				624	if (isPdfDelimiter(*start)) {
				625	// TODO(edisonn): how stream ] } > ) will be handled?
				626	// for now ignore, and it will become a keyword to be ignored
				627	}
				628	}
				629
				630	if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
				631	SkPdfObject::makeNull(token);
				632	return current;
				633	}
				634
				635	if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
				636	SkPdfObject::makeBoolean(true, token);
				637	return current;
				638	}
				639
				640	if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
				641	SkPdfObject::makeBoolean(false, token);
				642	return current;
				643	}
				644
				645	if (isPdfNumeric(*start)) {
				646	SkPdfObject::makeNumeric(start, current, token);
				647	} else {
				648	SkPdfObject::makeKeyword(start, current, token);
				649	}
				650	return current;
				651	}
				652
				653	SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	654	fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	655	return new SkPdfObject[BUFFER_SIZE];
				656	}
				657
				658	SkPdfAllocator::~SkPdfAllocator() {
				659	for (int i = 0 ; i < fHandles.count(); i++) {
				660	free(fHandles[i]);
				661	}
				662	for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	663	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				664	fHistory[i][j].reset();
				665	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	666	delete[] fHistory[i];
				667	}
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	668	for (int j = 0 ; j < BUFFER_SIZE; j++) {
				669	fCurrent[j].reset();
				670	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	671	delete[] fCurrent;
				672	}
				673
				674	SkPdfObject* SkPdfAllocator::allocObject() {
				675	if (fCurrentUsed >= BUFFER_SIZE) {
				676	fHistory.push(fCurrent);
				677	fCurrent = allocBlock();
				678	fCurrentUsed = 0;
edisonn@google.com	a5aaa79	2013-07-11 12:27:21 +0000	[diff] [blame]	679	fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	680	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	681	fCurrentUsed++;
				682	return &fCurrent[fCurrentUsed - 1];
				683	}
				684
				685	// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	686	SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	687	unsigned char* buffer = NULL;
				688	size_t len = 0;
				689	objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	690	// TODO(edisonn): hack, find end of object
				691	char* endobj = strstr((char*)buffer, "endobj");
				692	if (endobj) {
				693	len = endobj - (char*)buffer + strlen("endobj");
				694	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	695	fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
				696	fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	697	memcpy(fUncompressedStream, buffer, len);
				698	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	699
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	700	SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	701	// TODO(edisonn): hack, find end of object
				702	char* endobj = strstr((char*)buffer, "endobj");
				703	if (endobj) {
				704	len = endobj - (char*)buffer + strlen("endobj");
				705	}
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	706	fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
				707	fUncompressedStreamEnd = fUncompressedStream + len;
				708	memcpy(fUncompressedStream, buffer, len);
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	709	}
				710
				711	SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	712	}
				713
				714	bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
				715	token->fKeyword = NULL;
				716	token->fObject = NULL;
				717
				718	fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
				719	if (fUncompressedStream >= fUncompressedStreamEnd) {
				720	return false;
				721	}
				722
				723	SkPdfObject obj;
edisonn@google.com	951d653	2013-07-10 23:17:31 +0000	[diff] [blame]	724	fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	725
				726	// If it is a keyword, we will only get the pointer of the string
				727	if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
				728	token->fKeyword = obj.c_str();
				729	token->fKeywordLength = obj.len();
				730	token->fType = kKeyword_TokenType;
				731	} else {
				732	SkPdfObject* pobj = fAllocator->allocObject();
				733	*pobj = obj;
				734	token->fObject = pobj;
				735	token->fType = kObject_TokenType;
				736	}
				737
				738	#ifdef PDF_TRACE
				739	static int read_op = 0;
				740	read_op++;
edisonn@google.com	222382b	2013-07-10 22:33:10 +0000	[diff] [blame]	741	if (548 == read_op) {
edisonn@google.com	571c70b	2013-07-10 17:09:50 +0000	[diff] [blame]	742	printf("break;\n");
				743	}
				744	printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				745	#endif
				746
				747	return true;
				748	}
				749
				750	void SkPdfNativeTokenizer::PutBack(PdfToken token) {
				751	SkASSERT(!fHasPutBack);
				752	fHasPutBack = true;
				753	fPutBack = token;
				754	#ifdef PDF_TRACE
				755	printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
				756	#endif
				757	}
				758
				759	bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
				760	if (fHasPutBack) {
				761	*token = fPutBack;
				762	fHasPutBack = false;
				763	#ifdef PDF_TRACE
				764	printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
				765	#endif
				766	return true;
				767	}
				768
				769	if (fEmpty) {
				770	#ifdef PDF_TRACE
				771	printf("EMPTY TOKENIZER\n");
				772	#endif
				773	return false;
				774	}
				775
				776	return readTokenCore(token);
edisonn@google.com	3aac1f9	2013-07-02 22:42:53 +0000	[diff] [blame]	777	}