blob: babfedfb059173b9637d86894fbf7cd1f2bd8791 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com78b38b12013-07-15 18:20:58 +00007#include "SkPdfImageDictionary_autogen.h"
8
9// TODO(edisonn): perf!!!
10// there could be 0s between start and end! but not in the needle.
11static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
12 int needleLen = strlen(needle);
13 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
14 strncmp(hayStart, needle, needleLen) == 0) {
15 return hayStart;
16 }
17
18 hayStart++;
19
20 while (hayStart < hayEnd) {
21 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
22 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26 hayStart++;
27 }
28 return NULL;
29}
30
edisonn@google.com3aac1f92013-07-02 22:42:53 +000031
edisonn@google.coma3356fc2013-07-10 18:20:06 +000032static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000033 while (start < end && isPdfWhiteSpace(*start)) {
34 if (*start == kComment_PdfDelimiter) {
35 // skip the comment until end of line
36 while (start < end && !isPdfEOL(*start)) {
37 *start = '\0';
38 start++;
39 }
40 } else {
41 *start = '\0';
42 start++;
43 }
44 }
45 return start;
46}
47
48// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.coma3356fc2013-07-10 18:20:06 +000049static unsigned char* endOfPdfToken(unsigned char* start, unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000050 //int opened brackets
51 //TODO(edisonn): what out for special chars, like \n, \032
52
53 SkASSERT(!isPdfWhiteSpace(*start));
54
55 if (start < end && isPdfDelimiter(*start)) {
56 start++;
57 return start;
58 }
59
60 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
61 start++;
62 }
63 return start;
64}
65
edisonn@google.com571c70b2013-07-10 17:09:50 +000066// last elem has to be ]
edisonn@google.com951d6532013-07-10 23:17:31 +000067static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000068 while (start < end) {
69 // skip white spaces
70 start = skipPdfWhiteSpaces(start, end);
71
72 unsigned char* endOfToken = endOfPdfToken(start, end);
73
74 if (endOfToken == start) {
75 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
76 return start;
77 }
78
79 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
80 return endOfToken;
81 }
82
83 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com951d6532013-07-10 23:17:31 +000084 start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +000085 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
86 // we are sure they are not references!
87 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
88 SkPdfObject* gen = array->removeLastInArray();
89 SkPdfObject* id = array->removeLastInArray();
90 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +000091 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +000092 }
93 array->appendInArray(newObj);
94 }
edisonn@google.com78b38b12013-07-15 18:20:58 +000095 printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com571c70b2013-07-10 17:09:50 +000096 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +000097 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
98 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 return start;
100}
101
102// When we read strings we will rewrite the string so we will reuse the memory
103// when we start to read the string, we already consumed the opened bracket
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000104static unsigned char* readString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000105 unsigned char* out = start;
106 unsigned char* in = start;
107
108 int openRoundBrackets = 0;
109 while (in < end && (*in != kClosedRoundBracket_PdfDelimiter || openRoundBrackets > 0)) {
110 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
111 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
112 if (*in == kEscape_PdfSpecial) {
113 if (in + 1 < end) {
114 switch (in[1]) {
115 case 'n':
116 *out = kLF_PdfWhiteSpace;
117 out++;
118 in += 2;
119 break;
120
121 case 'r':
122 *out = kCR_PdfWhiteSpace;
123 out++;
124 in += 2;
125 break;
126
127 case 't':
128 *out = kHT_PdfWhiteSpace;
129 out++;
130 in += 2;
131 break;
132
133 case 'b':
134 // TODO(edisonn): any special meaning to backspace?
135 *out = kBackspace_PdfSpecial;
136 out++;
137 in += 2;
138 break;
139
140 case 'f':
141 *out = kFF_PdfWhiteSpace;
142 out++;
143 in += 2;
144 break;
145
146 case kOpenedRoundBracket_PdfDelimiter:
147 *out = kOpenedRoundBracket_PdfDelimiter;
148 out++;
149 in += 2;
150 break;
151
152 case kClosedRoundBracket_PdfDelimiter:
153 *out = kClosedRoundBracket_PdfDelimiter;
154 out++;
155 in += 2;
156 break;
157
158 case kEscape_PdfSpecial:
159 *out = kEscape_PdfSpecial;
160 out++;
161 in += 2;
162 break;
163
164 case '0':
165 case '1':
166 case '2':
167 case '3':
168 case '4':
169 case '5':
170 case '6':
171 case '7': {
172 //read octals
173 in++; // consume backslash
174
175 int code = 0;
176 int i = 0;
177 while (in < end && *in >= '0' && *in < '8') {
178 code = (code << 3) + ((*in) - '0'); // code * 8 + d
179 i++;
180 in++;
181 if (i == 3) {
182 *out = code & 0xff;
183 out++;
184 i = 0;
185 }
186 }
187 if (i > 0) {
188 *out = code & 0xff;
189 out++;
190 }
191 }
192 break;
193
194 default:
195 // Per spec, backslash is ignored is escaped ch is unknown
196 in++;
197 break;
198 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000199 } else {
200 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000201 }
202 } else {
203 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
204 // we could have one look that first just inc current, and when we find the backslash
205 // we go to this loop
206 *in = *out;
207 in++;
208 out++;
209 }
210 }
211
212
213 SkPdfObject::makeString(start, out, str);
214 return in + 1; // consume ) at the end of the string
215}
216
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000217static unsigned char* readHexString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000218 unsigned char* out = start;
219 unsigned char* in = start;
220
221 unsigned char code = 0;
222
223 while (in < end) {
224 while (in < end && isPdfWhiteSpace(*in)) {
225 in++;
226 }
227
228 if (*in == kClosedInequityBracket_PdfDelimiter) {
229 *in = '\0';
230 in++;
231 // normal exit
232 break;
233 }
234
235 if (in >= end) {
236 // end too soon
237 break;
238 }
239
240 switch (*in) {
241 case '0':
242 case '1':
243 case '2':
244 case '3':
245 case '4':
246 case '5':
247 case '6':
248 case '7':
249 case '8':
250 case '9':
251 code = (*in - '0') << 4;
252 break;
253
254 case 'a':
255 case 'b':
256 case 'c':
257 case 'd':
258 case 'e':
259 case 'f':
260 code = (*in - 'a' + 10) << 4;
261 break;
262
263 case 'A':
264 case 'B':
265 case 'C':
266 case 'D':
267 case 'E':
268 case 'F':
269 code = (*in - 'A' + 10) << 4;
270 break;
271
272 // TODO(edisonn): spec does not say how to handle this error
273 default:
274 break;
275 }
276
277 in++; // advance
278
279 while (in < end && isPdfWhiteSpace(*in)) {
280 in++;
281 }
282
283 // TODO(edisonn): report error
284 if (in >= end) {
285 *out = code;
286 out++;
287 break;
288 }
289
290 if (*in == kClosedInequityBracket_PdfDelimiter) {
291 *out = code;
292 out++;
293 break;
294 }
295
296 switch (*in) {
297 case '0':
298 case '1':
299 case '2':
300 case '3':
301 case '4':
302 case '5':
303 case '6':
304 case '7':
305 case '8':
306 case '9':
307 code += (*in - '0');
308 break;
309
310 case 'a':
311 case 'b':
312 case 'c':
313 case 'd':
314 case 'e':
315 case 'f':
316 code += (*in - 'a' + 10);
317 break;
318
319 case 'A':
320 case 'B':
321 case 'C':
322 case 'D':
323 case 'E':
324 case 'F':
325 code += (*in - 'A' + 10);
326 break;
327
328 // TODO(edisonn): spec does not say how to handle this error
329 default:
330 break;
331 }
332
333 *out = code;
334 out++;
335 in++;
336 }
337
338 if (out < in) {
339 *out = '\0';
340 }
341
342 SkPdfObject::makeHexString(start, out, str);
343 return in; // consume > at the end of the string
344}
345
346// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000347static unsigned char* readName(unsigned char* start, unsigned char* end, SkPdfObject* name) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000348 unsigned char* out = start;
349 unsigned char* in = start;
350
351 unsigned char code = 0;
352
353 while (in < end) {
354 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
355 break;
356 }
357
358 if (*in == '#' && in + 2 < end) {
359 in++;
360 switch (*in) {
361 case '0':
362 case '1':
363 case '2':
364 case '3':
365 case '4':
366 case '5':
367 case '6':
368 case '7':
369 case '8':
370 case '9':
371 code = (*in - '0') << 4;
372 break;
373
374 case 'a':
375 case 'b':
376 case 'c':
377 case 'd':
378 case 'e':
379 case 'f':
380 code = (*in - 'a' + 10) << 4;
381 break;
382
383 case 'A':
384 case 'B':
385 case 'C':
386 case 'D':
387 case 'E':
388 case 'F':
389 code = (*in - 'A' + 10) << 4;
390 break;
391
392 // TODO(edisonn): spec does not say how to handle this error
393 default:
394 break;
395 }
396
397 in++; // advance
398
399 switch (*in) {
400 case '0':
401 case '1':
402 case '2':
403 case '3':
404 case '4':
405 case '5':
406 case '6':
407 case '7':
408 case '8':
409 case '9':
410 code += (*in - '0');
411 break;
412
413 case 'a':
414 case 'b':
415 case 'c':
416 case 'd':
417 case 'e':
418 case 'f':
419 code += (*in - 'a' + 10);
420 break;
421
422 case 'A':
423 case 'B':
424 case 'C':
425 case 'D':
426 case 'E':
427 case 'F':
428 code += (*in - 'A' + 10);
429 break;
430
431 // TODO(edisonn): spec does not say how to handle this error
432 default:
433 break;
434 }
435
436 *out = code;
437 out++;
438 in++;
439 } else {
440 *out = *in;
441 out++;
442 in++;
443 }
444 }
445
446 SkPdfObject::makeName(start, out, name);
447 return in;
448}
449
450// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
451// that makes for an interesting scenario, where the stream itself contains endstream, together
452// with a reference object with the length, but the real length object would be somewhere else
453// it could confuse the parser
454/*example:
455
4567 0 obj
457<< /length 8 0 R>>
458stream
459...............
460endstream
4618 0 obj #we are in stream actually, not a real object
462<< 10 >> #we are in stream actually, not a real object
463endobj
464endstream
4658 0 obj #real obj
466<< 100 >> #real obj
467endobj
468and it could get worse, with multiple object like this
469*/
470
471// right now implement the silly algorithm that assumes endstream is finishing the stream
472
473
edisonn@google.com951d6532013-07-10 23:17:31 +0000474static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000475 start = skipPdfWhiteSpaces(start, end);
476 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
477 // no stream. return.
478 return start;
479 }
480
481 start += 6; // strlen("stream")
482 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
483 start += 2;
484 } else if (start[0] == kLF_PdfWhiteSpace) {
485 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000486 } else if (isPdfWhiteSpace(start[0])) {
487 start += 1;
488 } else {
489 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
490 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000491 }
492
493 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
494 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000495 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000496
497 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000498 if (stream->has_Length() && stream->Length(doc) > 0) {
499 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000500 }
501
502 // TODO(edisonn): laod external streams
503 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
504
505 if (length < 0) {
506 // scan the buffer, until we find first endstream
507 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com78b38b12013-07-15 18:20:58 +0000508 unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000509
510 if (endstream) {
511 length = endstream - start;
512 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000513 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000514 }
515 }
516 if (length >= 0) {
517 unsigned char* endstream = start + length;
518
519 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
520 endstream += 2;
521 } else if (endstream[0] == kLF_PdfWhiteSpace) {
522 endstream += 1;
523 }
524
525 // TODO(edisonn): verify the next bytes are "endstream"
526
527 endstream += strlen("endstream");
528 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000529 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000530 return endstream;
531 }
532 return start;
533}
534
edisonn@google.com78b38b12013-07-15 18:20:58 +0000535static unsigned char* readInlineImageStream(unsigned char* start, unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
536 // We already processed ID keyword, and we should be positioned immediately after it
537
538 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
539 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
540 start += 2;
541 } else if (start[0] == kLF_PdfWhiteSpace) {
542 start += 1;
543 } else if (isPdfWhiteSpace(start[0])) {
544 start += 1;
545 } else {
546 SkASSERT(isPdfDelimiter(start[0]));
547 // TODO(edisonn): warning?
548 }
549
550 unsigned char* endstream = (unsigned char*)strrstrk((char*)start, (char*)end, "EI");
551 unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
552
553 if (endstream) {
554 int length = endstream - start;
555 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
556 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
557 inlineImage->addStream(start, (size_t)length);
558 } else {
559 // TODO(edisonn): report error in inline image stream (ID-EI) section
560 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
561 return end;
562 }
563 return endEI;
564}
565
edisonn@google.com951d6532013-07-10 23:17:31 +0000566static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000567 SkPdfObject::makeEmptyDictionary(dict);
568
569 start = skipPdfWhiteSpaces(start, end);
570
571 while (start < end && *start == kNamed_PdfDelimiter) {
572 SkPdfObject key;
573 *start = '\0';
574 start++;
575 start = readName(start, end, &key);
576 start = skipPdfWhiteSpaces(start, end);
577
578 if (start < end) {
579 SkPdfObject* value = allocator->allocObject();
edisonn@google.com951d6532013-07-10 23:17:31 +0000580 start = nextObject(start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000581
582 start = skipPdfWhiteSpaces(start, end);
583
584 if (start < end) {
585 // seems we have an indirect reference
586 if (isPdfDigit(*start)) {
587 SkPdfObject generation;
edisonn@google.com951d6532013-07-10 23:17:31 +0000588 start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000589
590 SkPdfObject keywordR;
edisonn@google.com951d6532013-07-10 23:17:31 +0000591 start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000592
593 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
594 int64_t id = value->intValue();
595 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000596 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000597 dict->set(&key, value);
598 } else {
599 // error, ignore
600 dict->set(&key, value);
601 }
602 } else {
603 // next elem is not a digit, but it might not be / either!
604 dict->set(&key, value);
605 }
606 } else {
607 // /key >>
608 dict->set(&key, value);
609 return end;
610 }
611 start = skipPdfWhiteSpaces(start, end);
612 } else {
613 dict->set(&key, &SkPdfObject::kNull);
614 return end;
615 }
616 }
617
618 // TODO(edisonn): options to ignore these errors
619
620 // now we should expect >>
621 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000622 if (*start != kClosedInequityBracket_PdfDelimiter) {
623 // TODO(edisonn): report/warning
624 }
625 *start = '\0';
626 start++; // skip >
627 if (*start != kClosedInequityBracket_PdfDelimiter) {
628 // TODO(edisonn): report/warning
629 }
630 *start = '\0';
631 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000632
edisonn@google.com951d6532013-07-10 23:17:31 +0000633 start = readStream(start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000634
635 return start;
636}
637
edisonn@google.com951d6532013-07-10 23:17:31 +0000638unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000639 unsigned char* current;
640
641 // skip white spaces
642 start = skipPdfWhiteSpaces(start, end);
643
644 current = endOfPdfToken(start, end);
645
646 // no token, len would be 0
647 if (current == start) {
648 return NULL;
649 }
650
651 int tokenLen = current - start;
652
653 if (tokenLen == 1) {
654 // start array
655 switch (*start) {
656 case kOpenedSquareBracket_PdfDelimiter:
657 *start = '\0';
658 SkPdfObject::makeEmptyArray(token);
edisonn@google.com951d6532013-07-10 23:17:31 +0000659 return readArray(current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000660
661 case kOpenedRoundBracket_PdfDelimiter:
662 *start = '\0';
663 return readString(start, end, token);
664
665 case kOpenedInequityBracket_PdfDelimiter:
666 *start = '\0';
667 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com78b38b12013-07-15 18:20:58 +0000668 start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000669 // TODO(edisonn): pass here the length somehow?
edisonn@google.com951d6532013-07-10 23:17:31 +0000670 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000671 } else {
672 return readHexString(start + 1, end, token); // skip <
673 }
674
675 case kNamed_PdfDelimiter:
676 *start = '\0';
677 return readName(start + 1, end, token);
678
679 // TODO(edisonn): what to do curly brackets? read spec!
680 case kOpenedCurlyBracket_PdfDelimiter:
681 default:
682 break;
683 }
684
685 SkASSERT(!isPdfWhiteSpace(*start));
686 if (isPdfDelimiter(*start)) {
687 // TODO(edisonn): how stream ] } > ) will be handled?
688 // for now ignore, and it will become a keyword to be ignored
689 }
690 }
691
692 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
693 SkPdfObject::makeNull(token);
694 return current;
695 }
696
697 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
698 SkPdfObject::makeBoolean(true, token);
699 return current;
700 }
701
702 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
703 SkPdfObject::makeBoolean(false, token);
704 return current;
705 }
706
707 if (isPdfNumeric(*start)) {
708 SkPdfObject::makeNumeric(start, current, token);
709 } else {
710 SkPdfObject::makeKeyword(start, current, token);
711 }
712 return current;
713}
714
715SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000716 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000717 return new SkPdfObject[BUFFER_SIZE];
718}
719
720SkPdfAllocator::~SkPdfAllocator() {
721 for (int i = 0 ; i < fHandles.count(); i++) {
722 free(fHandles[i]);
723 }
724 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000725 for (int j = 0 ; j < BUFFER_SIZE; j++) {
726 fHistory[i][j].reset();
727 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000728 delete[] fHistory[i];
729 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000730 for (int j = 0 ; j < BUFFER_SIZE; j++) {
731 fCurrent[j].reset();
732 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000733 delete[] fCurrent;
734}
735
736SkPdfObject* SkPdfAllocator::allocObject() {
737 if (fCurrentUsed >= BUFFER_SIZE) {
738 fHistory.push(fCurrent);
739 fCurrent = allocBlock();
740 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000741 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000742 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743 fCurrentUsed++;
744 return &fCurrent[fCurrentUsed - 1];
745}
746
747// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000748SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000749 unsigned char* buffer = NULL;
750 size_t len = 0;
751 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
edisonn@google.com222382b2013-07-10 22:33:10 +0000752 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000753 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000754 if (endobj) {
755 len = endobj - (char*)buffer + strlen("endobj");
756 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000757 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
758 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000759 memcpy(fUncompressedStream, buffer, len);
760}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000761
edisonn@google.com951d6532013-07-10 23:17:31 +0000762SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000763 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000764 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000765 if (endobj) {
766 len = endobj - (char*)buffer + strlen("endobj");
767 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000768 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
769 fUncompressedStreamEnd = fUncompressedStream + len;
770 memcpy(fUncompressedStream, buffer, len);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000771}
772
773SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000774}
775
776bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
777 token->fKeyword = NULL;
778 token->fObject = NULL;
779
780 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
781 if (fUncompressedStream >= fUncompressedStreamEnd) {
782 return false;
783 }
784
785 SkPdfObject obj;
edisonn@google.com951d6532013-07-10 23:17:31 +0000786 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000787
788 // If it is a keyword, we will only get the pointer of the string
789 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
790 token->fKeyword = obj.c_str();
791 token->fKeywordLength = obj.len();
792 token->fType = kKeyword_TokenType;
793 } else {
794 SkPdfObject* pobj = fAllocator->allocObject();
795 *pobj = obj;
796 token->fObject = pobj;
797 token->fType = kObject_TokenType;
798 }
799
800#ifdef PDF_TRACE
801 static int read_op = 0;
802 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000803 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000804 printf("break;\n");
805 }
806 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
807#endif
808
809 return true;
810}
811
812void SkPdfNativeTokenizer::PutBack(PdfToken token) {
813 SkASSERT(!fHasPutBack);
814 fHasPutBack = true;
815 fPutBack = token;
816#ifdef PDF_TRACE
817 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
818#endif
819}
820
821bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
822 if (fHasPutBack) {
823 *token = fPutBack;
824 fHasPutBack = false;
825#ifdef PDF_TRACE
826 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
827#endif
828 return true;
829 }
830
831 if (fEmpty) {
832#ifdef PDF_TRACE
833 printf("EMPTY TOKENIZER\n");
834#endif
835 return false;
836 }
837
838 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000839}
edisonn@google.com78b38b12013-07-15 18:20:58 +0000840
841#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
842
843// keys
844DECLARE_PDF_NAME(BitsPerComponent);
845DECLARE_PDF_NAME(ColorSpace);
846DECLARE_PDF_NAME(Decode);
847DECLARE_PDF_NAME(DecodeParms);
848DECLARE_PDF_NAME(Filter);
849DECLARE_PDF_NAME(Height);
850DECLARE_PDF_NAME(ImageMask);
851DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
852DECLARE_PDF_NAME(Interpolate);
853DECLARE_PDF_NAME(Width);
854
855// values
856DECLARE_PDF_NAME(DeviceGray);
857DECLARE_PDF_NAME(DeviceRGB);
858DECLARE_PDF_NAME(DeviceCMYK);
859DECLARE_PDF_NAME(Indexed);
860DECLARE_PDF_NAME(ASCIIHexDecode);
861DECLARE_PDF_NAME(ASCII85Decode);
862DECLARE_PDF_NAME(LZWDecode);
863DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
864DECLARE_PDF_NAME(RunLengthDecode);
865DECLARE_PDF_NAME(CCITTFaxDecode);
866DECLARE_PDF_NAME(DCTDecode);
867
868#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
869
870
871static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
872 if (!key || !key->isName()) {
873 return key;
874 }
875
876 // TODO(edisonn): use autogenerated code!
877 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
878 HANDLE_NAME_ABBR(key, ColorSpace, CS);
879 HANDLE_NAME_ABBR(key, Decode, D);
880 HANDLE_NAME_ABBR(key, DecodeParms, DP);
881 HANDLE_NAME_ABBR(key, Filter, F);
882 HANDLE_NAME_ABBR(key, Height, H);
883 HANDLE_NAME_ABBR(key, ImageMask, IM);
884// HANDLE_NAME_ABBR(key, Intent, );
885 HANDLE_NAME_ABBR(key, Interpolate, I);
886 HANDLE_NAME_ABBR(key, Width, W);
887
888 return key;
889}
890
891static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
892 if (!value || !value->isName()) {
893 return value;
894 }
895
896 // TODO(edisonn): use autogenerated code!
897 HANDLE_NAME_ABBR(value, DeviceGray, G);
898 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
899 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
900 HANDLE_NAME_ABBR(value, Indexed, I);
901 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
902 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
903 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
904 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
905 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
906 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
907 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
908
909 return value;
910}
911
912SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
913 // BI already processed
914 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
915 if (fUncompressedStream >= fUncompressedStreamEnd) {
916 return NULL;
917 }
918
919 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
920 SkPdfObject::makeEmptyDictionary(inlineImage);
921
922 while (fUncompressedStream < fUncompressedStreamEnd) {
923 SkPdfObject* key = fAllocator->allocObject();
924 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
925
926 if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
927 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
928 return inlineImage;
929 } else {
930 SkPdfObject* obj = fAllocator->allocObject();
931 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
932 // TODO(edisonn): perf maybe we should not expand abreviation like this
933 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
934 inlineImageValueAbbreviationExpand(obj));
935 }
936 }
937 // TODO(edisonn): report end of data with inline image without an EI
938 return inlineImage;
939}