blob: 4e0e4c404ac3ffb17517c2ca0605dc5099bb7545 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
8#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +00009#include "SkPdfNativeObject.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +000010#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000011
edisonn@google.com33f11b62013-08-14 21:35:27 +000012// TODO(edisonn): mac builder does not find the header ... but from headers is ok
13//#include "SkPdfStreamCommonDictionary_autogen.h"
14//#include "SkPdfImageDictionary_autogen.h"
15#include "SkPdfHeaders_autogen.h"
16
edisonn@google.com78b38b12013-07-15 18:20:58 +000017
18// TODO(edisonn): perf!!!
19// there could be 0s between start and end! but not in the needle.
20static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
21 int needleLen = strlen(needle);
22 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26
27 hayStart++;
28
29 while (hayStart < hayEnd) {
30 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
31 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
32 strncmp(hayStart, needle, needleLen) == 0) {
33 return hayStart;
34 }
35 hayStart++;
36 }
37 return NULL;
38}
39
edisonn@google.come2e01ff2013-08-02 20:24:48 +000040#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000041static void TRACE_INDENT(int level, const char* type) {
42 static int id = 0;
43 id++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +000044#if 0
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045 if (478613 == id) {
46 printf("break;\n");
47 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +000048#endif
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000049 // all types should have 2 letters, so the text is alligned nicely
50 printf("\n%10i %15s: ", id, type);
51 for (int i = 0 ; i < level; i++) {
52 printf(" ");
53 }
54}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000055
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000056static void TRACE_COMMENT(char ch) {
57 printf("%c", ch);
58}
59
60static void TRACE_TK(char ch) {
61 printf("%c", ch);
62}
63
64static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
65 while (start < end) {
66 printf("%c", *start);
67 start++;
68 }
69 printf("\n");
70}
71
72static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
73 while (start < end) {
74 printf("%c", *start);
75 start++;
76 }
77 printf("\n");
78}
79
80static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
81 while (start < end) {
82 printf("%c", *start);
83 start++;
84 }
85 printf("\n");
86}
87
88#else
89#define TRACE_INDENT(level,type)
90#define TRACE_COMMENT(ch)
91#define TRACE_TK(ch)
92#define TRACE_NAME(start,end)
93#define TRACE_STRING(start,end)
94#define TRACE_HEXSTRING(start,end)
95#endif
96
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000097const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 TRACE_INDENT(level, "White Space");
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000099 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000100 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000101 if (*start == kComment_PdfDelimiter) {
102 // skip the comment until end of line
103 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000104 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000105 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000106 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 }
108 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110 start++;
111 }
112 }
113 return start;
114}
115
116// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000117const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000118 //int opened brackets
119 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121
122 SkASSERT(!isPdfWhiteSpace(*start));
123
124 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000125 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126 start++;
127 return start;
128 }
129
130 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000131 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132 start++;
133 }
134 return start;
135}
136
edisonn@google.com571c70b2013-07-10 17:09:50 +0000137// last elem has to be ]
edisonn@google.combca421b2013-09-05 20:00:21 +0000138static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* array, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM) {
139 SkPdfNativeObject::makeEmptyArray(array PUT_TRACK_STREAM(start, start));
140
edisonn@google.com1f080162013-07-23 21:05:49 +0000141 if (allocator == NULL) {
142 // TODO(edisonn): report/warning error
143 return end;
144 }
145
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000146 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000147 while (start < end) {
148 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000149 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000150
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000151 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000152
153 if (endOfToken == start) {
154 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
155 return start;
156 }
157
158 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
159 return endOfToken;
160 }
161
edisonn@google.com3aa35552013-08-14 18:26:20 +0000162 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000163 start = nextObject(level + 1, start, end, newObj, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000164 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
165 // we are sure they are not references!
166 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000167 SkPdfNativeObject* gen = array->removeLastInArray();
168 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.combca421b2013-09-05 20:00:21 +0000169
170 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj));
171
edisonn@google.com571c70b2013-07-10 17:09:50 +0000172 }
173 array->appendInArray(newObj);
174 }
175 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000176 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
177 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000178 return start;
179}
180
181// When we read strings we will rewrite the string so we will reuse the memory
182// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000183
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000184// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
185
186static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
187 TRACE_INDENT(level, "String");
188 const unsigned char* in = start;
189 bool hasOut = (out != NULL);
190
191 int openRoundBrackets = 1;
192 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000193 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
194 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000195 if (openRoundBrackets == 0) {
196 in++; // consumed )
197 break;
198 }
199
edisonn@google.com571c70b2013-07-10 17:09:50 +0000200 if (*in == kEscape_PdfSpecial) {
201 if (in + 1 < end) {
202 switch (in[1]) {
203 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000204 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000205 out++;
206 in += 2;
207 break;
208
209 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000210 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000211 out++;
212 in += 2;
213 break;
214
215 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000216 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000217 out++;
218 in += 2;
219 break;
220
221 case 'b':
222 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000223 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000224 out++;
225 in += 2;
226 break;
227
228 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000229 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000230 out++;
231 in += 2;
232 break;
233
234 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000235 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000236 out++;
237 in += 2;
238 break;
239
240 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000241 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000242 out++;
243 in += 2;
244 break;
245
246 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000247 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000248 out++;
249 in += 2;
250 break;
251
252 case '0':
253 case '1':
254 case '2':
255 case '3':
256 case '4':
257 case '5':
258 case '6':
259 case '7': {
260 //read octals
261 in++; // consume backslash
262
263 int code = 0;
264 int i = 0;
265 while (in < end && *in >= '0' && *in < '8') {
266 code = (code << 3) + ((*in) - '0'); // code * 8 + d
267 i++;
268 in++;
269 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000270 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000271 out++;
272 i = 0;
273 }
274 }
275 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000276 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000277 out++;
278 }
279 }
280 break;
281
282 default:
283 // Per spec, backslash is ignored is escaped ch is unknown
284 in++;
285 break;
286 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000287 } else {
288 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000289 }
290 } else {
291 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
292 // we could have one look that first just inc current, and when we find the backslash
293 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000294 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000295 in++;
296 out++;
297 }
298 }
299
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000300 if (hasOut) {
301 return in; // consumed already ) at the end of the string
302 } else {
303 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
304 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000305}
306
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000307static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
308 return readString(level, start, end, NULL) - start;
309}
310
edisonn@google.combca421b2013-09-05 20:00:21 +0000311static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator GET_TRACK_STREAM) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000312 if (!allocator) {
313 return end;
314 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000315 int outLength = readStringLength(level, start, end);
316 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
317 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000318 const unsigned char* now = readString(level, start, end, out);
319 SkPdfNativeObject::makeString(out, out + outLength, str PUT_TRACK_STREAM(start, now));
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000320 TRACE_STRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000321 return now; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000322}
323
324static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
325 TRACE_INDENT(level, "HexString");
326 bool hasOut = (out != NULL);
327 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000328
329 unsigned char code = 0;
330
331 while (in < end) {
332 while (in < end && isPdfWhiteSpace(*in)) {
333 in++;
334 }
335
336 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000337 //*in = '\0';
338 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000339 // normal exit
340 break;
341 }
342
343 if (in >= end) {
344 // end too soon
345 break;
346 }
347
348 switch (*in) {
349 case '0':
350 case '1':
351 case '2':
352 case '3':
353 case '4':
354 case '5':
355 case '6':
356 case '7':
357 case '8':
358 case '9':
359 code = (*in - '0') << 4;
360 break;
361
362 case 'a':
363 case 'b':
364 case 'c':
365 case 'd':
366 case 'e':
367 case 'f':
368 code = (*in - 'a' + 10) << 4;
369 break;
370
371 case 'A':
372 case 'B':
373 case 'C':
374 case 'D':
375 case 'E':
376 case 'F':
377 code = (*in - 'A' + 10) << 4;
378 break;
379
380 // TODO(edisonn): spec does not say how to handle this error
381 default:
382 break;
383 }
384
385 in++; // advance
386
387 while (in < end && isPdfWhiteSpace(*in)) {
388 in++;
389 }
390
391 // TODO(edisonn): report error
392 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000393 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000394 out++;
395 break;
396 }
397
398 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000399 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000400 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000401 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000402 break;
403 }
404
405 switch (*in) {
406 case '0':
407 case '1':
408 case '2':
409 case '3':
410 case '4':
411 case '5':
412 case '6':
413 case '7':
414 case '8':
415 case '9':
416 code += (*in - '0');
417 break;
418
419 case 'a':
420 case 'b':
421 case 'c':
422 case 'd':
423 case 'e':
424 case 'f':
425 code += (*in - 'a' + 10);
426 break;
427
428 case 'A':
429 case 'B':
430 case 'C':
431 case 'D':
432 case 'E':
433 case 'F':
434 code += (*in - 'A' + 10);
435 break;
436
437 // TODO(edisonn): spec does not say how to handle this error
438 default:
439 break;
440 }
441
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000442 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000443 out++;
444 in++;
445 }
446
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000447 if (hasOut) {
448 return in; // consumed already > at the end of the string
449 } else {
450 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000451 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000452}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000453
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000454static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
455 return readHexString(level, start, end, NULL) - start;
456}
457
edisonn@google.combca421b2013-09-05 20:00:21 +0000458static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator GET_TRACK_STREAM) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000459 if (!allocator) {
460 return end;
461 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000462 int outLength = readHexStringLength(level, start, end);
463 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
464 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000465 const unsigned char* now = readHexString(level, start, end, out);
466 SkPdfNativeObject::makeHexString(out, out + outLength, str PUT_TRACK_STREAM(start, now));
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000467 TRACE_HEXSTRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000468 return now; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000469}
470
471// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000472static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
473 TRACE_INDENT(level, "Name");
474 bool hasOut = (out != NULL);
475 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000476
477 unsigned char code = 0;
478
479 while (in < end) {
480 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
481 break;
482 }
483
484 if (*in == '#' && in + 2 < end) {
485 in++;
486 switch (*in) {
487 case '0':
488 case '1':
489 case '2':
490 case '3':
491 case '4':
492 case '5':
493 case '6':
494 case '7':
495 case '8':
496 case '9':
497 code = (*in - '0') << 4;
498 break;
499
500 case 'a':
501 case 'b':
502 case 'c':
503 case 'd':
504 case 'e':
505 case 'f':
506 code = (*in - 'a' + 10) << 4;
507 break;
508
509 case 'A':
510 case 'B':
511 case 'C':
512 case 'D':
513 case 'E':
514 case 'F':
515 code = (*in - 'A' + 10) << 4;
516 break;
517
518 // TODO(edisonn): spec does not say how to handle this error
519 default:
520 break;
521 }
522
523 in++; // advance
524
525 switch (*in) {
526 case '0':
527 case '1':
528 case '2':
529 case '3':
530 case '4':
531 case '5':
532 case '6':
533 case '7':
534 case '8':
535 case '9':
536 code += (*in - '0');
537 break;
538
539 case 'a':
540 case 'b':
541 case 'c':
542 case 'd':
543 case 'e':
544 case 'f':
545 code += (*in - 'a' + 10);
546 break;
547
548 case 'A':
549 case 'B':
550 case 'C':
551 case 'D':
552 case 'E':
553 case 'F':
554 code += (*in - 'A' + 10);
555 break;
556
557 // TODO(edisonn): spec does not say how to handle this error
558 default:
559 break;
560 }
561
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000562 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000563 out++;
564 in++;
565 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000566 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000567 out++;
568 in++;
569 }
570 }
571
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000572 if (hasOut) {
573 return in;
574 } else {
575 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
576 }
577}
578
579static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
580 return readName(level, start, end, NULL) - start;
581}
582
edisonn@google.combca421b2013-09-05 20:00:21 +0000583static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* name, SkPdfAllocator* allocator GET_TRACK_STREAM) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000584 if (!allocator) {
585 return end;
586 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000587 int outLength = readNameLength(level, start, end);
588 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
589 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000590 const unsigned char* now = readName(level, start, end, out);
591 SkPdfNativeObject::makeName(out, out + outLength, name PUT_TRACK_STREAM(start, now));
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000592 TRACE_NAME(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000593 return now;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000594}
595
596// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
597// that makes for an interesting scenario, where the stream itself contains endstream, together
598// with a reference object with the length, but the real length object would be somewhere else
599// it could confuse the parser
600/*example:
601
6027 0 obj
603<< /length 8 0 R>>
604stream
605...............
606endstream
6078 0 obj #we are in stream actually, not a real object
608<< 10 >> #we are in stream actually, not a real object
609endobj
610endstream
6118 0 obj #real obj
612<< 100 >> #real obj
613endobj
614and it could get worse, with multiple object like this
615*/
616
617// right now implement the silly algorithm that assumes endstream is finishing the stream
618
619
edisonn@google.com3aa35552013-08-14 18:26:20 +0000620static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000621 TRACE_INDENT(level, "Stream");
622 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000623 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
624 // no stream. return.
625 return start;
626 }
627
628 start += 6; // strlen("stream")
629 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
630 start += 2;
631 } else if (start[0] == kLF_PdfWhiteSpace) {
632 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000633 } else if (isPdfWhiteSpace(start[0])) {
634 start += 1;
635 } else {
636 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
637 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000638 }
639
640 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
641 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000642 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000643
644 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000645 if (stream->has_Length() && stream->Length(doc) > 0) {
646 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000647 }
648
649 // TODO(edisonn): laod external streams
650 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
651
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000652
653 if (length >= 0) {
654 const unsigned char* endstream = start + length;
655
656 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
657 endstream += 2;
658 } else if (endstream[0] == kLF_PdfWhiteSpace) {
659 endstream += 1;
660 }
661
662 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
663 length = -1;
664 }
665 }
666
edisonn@google.com571c70b2013-07-10 17:09:50 +0000667 if (length < 0) {
668 // scan the buffer, until we find first endstream
669 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000670 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000671
672 if (endstream) {
673 length = endstream - start;
674 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000675 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000676 }
677 }
678 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000679 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000680
681 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
682 endstream += 2;
683 } else if (endstream[0] == kLF_PdfWhiteSpace) {
684 endstream += 1;
685 }
686
687 // TODO(edisonn): verify the next bytes are "endstream"
688
689 endstream += strlen("endstream");
690 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000691 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000692 return endstream;
693 }
694 return start;
695}
696
edisonn@google.com3aa35552013-08-14 18:26:20 +0000697static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000698 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000699 // We already processed ID keyword, and we should be positioned immediately after it
700
701 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
702 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
703 start += 2;
704 } else if (start[0] == kLF_PdfWhiteSpace) {
705 start += 1;
706 } else if (isPdfWhiteSpace(start[0])) {
707 start += 1;
708 } else {
709 SkASSERT(isPdfDelimiter(start[0]));
710 // TODO(edisonn): warning?
711 }
712
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000713 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
714 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000715
716 if (endstream) {
717 int length = endstream - start;
718 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
719 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
720 inlineImage->addStream(start, (size_t)length);
721 } else {
722 // TODO(edisonn): report error in inline image stream (ID-EI) section
723 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
724 return end;
725 }
726 return endEI;
727}
728
edisonn@google.combca421b2013-09-05 20:00:21 +0000729static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000730 if (allocator == NULL) {
731 // TODO(edisonn): report/warning error
732 return end;
733 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000734 TRACE_INDENT(level, "Dictionary");
edisonn@google.combca421b2013-09-05 20:00:21 +0000735 SkPdfNativeObject::makeEmptyDictionary(dict PUT_TRACK_STREAM(start, start));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000736
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000737 start = skipPdfWhiteSpaces(level, start, end);
738 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000739
740 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000741 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000742 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743 start++;
edisonn@google.combca421b2013-09-05 20:00:21 +0000744 start = readName(level + 1, start, end, &key, &tmpStorage PUT_TRACK_STREAM_ARGS);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000745 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000746
747 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000748 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000749 start = nextObject(level + 1, start, end, value, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000750
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000751 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000752
753 if (start < end) {
754 // seems we have an indirect reference
755 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000756 SkPdfNativeObject generation;
edisonn@google.combca421b2013-09-05 20:00:21 +0000757 start = nextObject(level + 1, start, end, &generation, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000758
edisonn@google.com3aa35552013-08-14 18:26:20 +0000759 SkPdfNativeObject keywordR;
edisonn@google.combca421b2013-09-05 20:00:21 +0000760 start = nextObject(level + 1, start, end, &keywordR, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000761
762 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
763 int64_t id = value->intValue();
edisonn@google.combca421b2013-09-05 20:00:21 +0000764 SkPdfNativeObject::resetAndMakeReference((unsigned int)id, (unsigned int)generation.intValue(), value PUT_TRACK_PARAMETERS_OBJ2(value, &generation));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000765 dict->set(&key, value);
766 } else {
767 // error, ignore
768 dict->set(&key, value);
769 }
770 } else {
771 // next elem is not a digit, but it might not be / either!
772 dict->set(&key, value);
773 }
774 } else {
775 // /key >>
776 dict->set(&key, value);
777 return end;
778 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000779 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000780 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000781 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000782 return end;
783 }
784 }
785
786 // TODO(edisonn): options to ignore these errors
787
788 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000789 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000790 if (*start != kClosedInequityBracket_PdfDelimiter) {
791 // TODO(edisonn): report/warning
792 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000793 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000794 start++; // skip >
795 if (*start != kClosedInequityBracket_PdfDelimiter) {
796 // TODO(edisonn): report/warning
797 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000798 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000799 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800
edisonn@google.combca421b2013-09-05 20:00:21 +0000801 STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
802
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000803 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000804
805 return start;
806}
807
edisonn@google.combca421b2013-09-05 20:00:21 +0000808const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000809 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000810
811 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000812 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000813
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000814 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000815
816 // no token, len would be 0
817 if (current == start) {
818 return NULL;
819 }
820
821 int tokenLen = current - start;
822
823 if (tokenLen == 1) {
824 // start array
825 switch (*start) {
826 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000827 //*start = '\0';
edisonn@google.combca421b2013-09-05 20:00:21 +0000828 return readArray(level + 1, current, end, token, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000829
830 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000831 //*start = '\0';
edisonn@google.combca421b2013-09-05 20:00:21 +0000832 return readString(level, start + 1, end, token, allocator PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000833
834 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000835 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000836 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000837 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000838 // TODO(edisonn): pass here the length somehow?
edisonn@google.combca421b2013-09-05 20:00:21 +0000839 return readDictionary(level + 1, start + 2, end, token, allocator, doc PUT_TRACK_STREAM_ARGS); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000840 } else {
edisonn@google.combca421b2013-09-05 20:00:21 +0000841 return readHexString(level, start + 1, end, token, allocator PUT_TRACK_STREAM_ARGS); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000842 }
843
844 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000845 //*start = '\0';
edisonn@google.combca421b2013-09-05 20:00:21 +0000846 return readName(level, start + 1, end, token, allocator PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000847
848 // TODO(edisonn): what to do curly brackets? read spec!
849 case kOpenedCurlyBracket_PdfDelimiter:
850 default:
851 break;
852 }
853
854 SkASSERT(!isPdfWhiteSpace(*start));
855 if (isPdfDelimiter(*start)) {
856 // TODO(edisonn): how stream ] } > ) will be handled?
857 // for now ignore, and it will become a keyword to be ignored
858 }
859 }
860
861 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.combca421b2013-09-05 20:00:21 +0000862 SkPdfNativeObject::makeNull(token PUT_TRACK_STREAM(start, start + 4));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000863 return current;
864 }
865
866 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.combca421b2013-09-05 20:00:21 +0000867 SkPdfNativeObject::makeBoolean(true, token PUT_TRACK_STREAM(start, start + 4));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000868 return current;
869 }
870
edisonn@google.comf111a4b2013-07-31 18:22:36 +0000871 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.combca421b2013-09-05 20:00:21 +0000872 SkPdfNativeObject::makeBoolean(false, token PUT_TRACK_STREAM(start, start + 5));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000873 return current;
874 }
875
876 if (isPdfNumeric(*start)) {
edisonn@google.combca421b2013-09-05 20:00:21 +0000877 SkPdfNativeObject::makeNumeric(start, current, token PUT_TRACK_STREAM(start, current));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000878 } else {
edisonn@google.combca421b2013-09-05 20:00:21 +0000879 SkPdfNativeObject::makeKeyword(start, current, token PUT_TRACK_STREAM(start, current));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000880 }
881 return current;
882}
883
edisonn@google.com3aa35552013-08-14 18:26:20 +0000884SkPdfNativeObject* SkPdfAllocator::allocBlock() {
885 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
886 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000887}
888
889SkPdfAllocator::~SkPdfAllocator() {
890 for (int i = 0 ; i < fHandles.count(); i++) {
891 free(fHandles[i]);
892 }
893 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000894 for (int j = 0 ; j < BUFFER_SIZE; j++) {
895 fHistory[i][j].reset();
896 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000897 delete[] fHistory[i];
898 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000899 for (int j = 0 ; j < BUFFER_SIZE; j++) {
900 fCurrent[j].reset();
901 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000902 delete[] fCurrent;
903}
904
edisonn@google.com3aa35552013-08-14 18:26:20 +0000905SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000906 if (fCurrentUsed >= BUFFER_SIZE) {
907 fHistory.push(fCurrent);
908 fCurrent = allocBlock();
909 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000910 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000911 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000912 fCurrentUsed++;
913 return &fCurrent[fCurrentUsed - 1];
914}
915
916// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com33f11b62013-08-14 21:35:27 +0000917SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000918 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000919 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000920 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000921 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000922 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000923 if (endobj) {
924 len = endobj - (char*)buffer + strlen("endobj");
925 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000926 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000927 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000928}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000929
edisonn@google.com33f11b62013-08-14 21:35:27 +0000930SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000931 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000932 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000933 if (endobj) {
934 len = endobj - (char*)buffer + strlen("endobj");
935 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000936 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000937 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000938}
939
940SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000941}
942
943bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000944 SkPdfNativeObject obj;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000945#ifdef PDF_TRACE_READ_TOKEN
946 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000947#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000948 token->fKeyword = NULL;
949 token->fObject = NULL;
950
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000951 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000952 if (fUncompressedStream >= fUncompressedStreamEnd) {
953 return false;
954 }
955
edisonn@google.combca421b2013-09-05 20:00:21 +0000956 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000957
958 // If it is a keyword, we will only get the pointer of the string
edisonn@google.com3aa35552013-08-14 18:26:20 +0000959 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000960 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000961 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000962 token->fType = kKeyword_TokenType;
963 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000964 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000965 *pobj = obj;
966 token->fObject = pobj;
967 token->fType = kObject_TokenType;
968 }
969
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000970#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000971 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000972#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000973 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000974 printf("break;\n");
975 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000976#endif
edisonn@google.come91260c2013-09-04 17:29:06 +0000977 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000978#endif
979
980 return true;
981}
982
983void SkPdfNativeTokenizer::PutBack(PdfToken token) {
984 SkASSERT(!fHasPutBack);
985 fHasPutBack = true;
986 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000987#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.come91260c2013-09-04 17:29:06 +0000988 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000989#endif
990}
991
992bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
993 if (fHasPutBack) {
994 *token = fPutBack;
995 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000996#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.come91260c2013-09-04 17:29:06 +0000997 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000998#endif
999 return true;
1000 }
1001
1002 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001003#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +00001004 printf("EMPTY TOKENIZER\n");
1005#endif
1006 return false;
1007 }
1008
1009 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001010}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001011
1012#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1013
1014// keys
1015DECLARE_PDF_NAME(BitsPerComponent);
1016DECLARE_PDF_NAME(ColorSpace);
1017DECLARE_PDF_NAME(Decode);
1018DECLARE_PDF_NAME(DecodeParms);
1019DECLARE_PDF_NAME(Filter);
1020DECLARE_PDF_NAME(Height);
1021DECLARE_PDF_NAME(ImageMask);
1022DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1023DECLARE_PDF_NAME(Interpolate);
1024DECLARE_PDF_NAME(Width);
1025
1026// values
1027DECLARE_PDF_NAME(DeviceGray);
1028DECLARE_PDF_NAME(DeviceRGB);
1029DECLARE_PDF_NAME(DeviceCMYK);
1030DECLARE_PDF_NAME(Indexed);
1031DECLARE_PDF_NAME(ASCIIHexDecode);
1032DECLARE_PDF_NAME(ASCII85Decode);
1033DECLARE_PDF_NAME(LZWDecode);
1034DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1035DECLARE_PDF_NAME(RunLengthDecode);
1036DECLARE_PDF_NAME(CCITTFaxDecode);
1037DECLARE_PDF_NAME(DCTDecode);
1038
1039#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1040
1041
edisonn@google.com3aa35552013-08-14 18:26:20 +00001042static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001043 if (!key || !key->isName()) {
1044 return key;
1045 }
1046
1047 // TODO(edisonn): use autogenerated code!
1048 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1049 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1050 HANDLE_NAME_ABBR(key, Decode, D);
1051 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1052 HANDLE_NAME_ABBR(key, Filter, F);
1053 HANDLE_NAME_ABBR(key, Height, H);
1054 HANDLE_NAME_ABBR(key, ImageMask, IM);
1055// HANDLE_NAME_ABBR(key, Intent, );
1056 HANDLE_NAME_ABBR(key, Interpolate, I);
1057 HANDLE_NAME_ABBR(key, Width, W);
1058
1059 return key;
1060}
1061
edisonn@google.com3aa35552013-08-14 18:26:20 +00001062static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001063 if (!value || !value->isName()) {
1064 return value;
1065 }
1066
1067 // TODO(edisonn): use autogenerated code!
1068 HANDLE_NAME_ABBR(value, DeviceGray, G);
1069 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1070 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1071 HANDLE_NAME_ABBR(value, Indexed, I);
1072 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1073 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1074 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1075 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1076 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1077 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1078 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1079
1080 return value;
1081}
1082
1083SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1084 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001085 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001086 if (fUncompressedStream >= fUncompressedStreamEnd) {
1087 return NULL;
1088 }
1089
1090 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +00001091 SkPdfNativeObject::makeEmptyDictionary(inlineImage PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart, fUncompressedStream - fUncompressedStreamStart));
edisonn@google.com78b38b12013-07-15 18:20:58 +00001092
1093 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001094 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +00001095 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart));
edisonn@google.com78b38b12013-07-15 18:20:58 +00001096
edisonn@google.come878e722013-07-29 19:10:58 +00001097 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001098 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001099 return inlineImage;
1100 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001101 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +00001102 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart));
edisonn@google.com78b38b12013-07-15 18:20:58 +00001103 // TODO(edisonn): perf maybe we should not expand abreviation like this
1104 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1105 inlineImageValueAbbreviationExpand(obj));
1106 }
1107 }
1108 // TODO(edisonn): report end of data with inline image without an EI
1109 return inlineImage;
1110}