blob: 41bd92d17080423157e5c92fbcc8033521429e90 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com78b38b12013-07-15 18:20:58 +00007#include "SkPdfImageDictionary_autogen.h"
8
9// TODO(edisonn): perf!!!
10// there could be 0s between start and end! but not in the needle.
11static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
12 int needleLen = strlen(needle);
13 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
14 strncmp(hayStart, needle, needleLen) == 0) {
15 return hayStart;
16 }
17
18 hayStart++;
19
20 while (hayStart < hayEnd) {
21 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
22 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26 hayStart++;
27 }
28 return NULL;
29}
30
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000031#ifdef PDF_TRACE
32static void TRACE_INDENT(int level, const char* type) {
33 static int id = 0;
34 id++;
35 if (478613 == id) {
36 printf("break;\n");
37 }
38 // all types should have 2 letters, so the text is alligned nicely
39 printf("\n%10i %15s: ", id, type);
40 for (int i = 0 ; i < level; i++) {
41 printf(" ");
42 }
43}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000044
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045static void TRACE_COMMENT(char ch) {
46 printf("%c", ch);
47}
48
49static void TRACE_TK(char ch) {
50 printf("%c", ch);
51}
52
53static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
54 while (start < end) {
55 printf("%c", *start);
56 start++;
57 }
58 printf("\n");
59}
60
61static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
62 while (start < end) {
63 printf("%c", *start);
64 start++;
65 }
66 printf("\n");
67}
68
69static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
70 while (start < end) {
71 printf("%c", *start);
72 start++;
73 }
74 printf("\n");
75}
76
77#else
78#define TRACE_INDENT(level,type)
79#define TRACE_COMMENT(ch)
80#define TRACE_TK(ch)
81#define TRACE_NAME(start,end)
82#define TRACE_STRING(start,end)
83#define TRACE_HEXSTRING(start,end)
84#endif
85
86static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
87 TRACE_INDENT(level, "White Space");
edisonn@google.com571c70b2013-07-10 17:09:50 +000088 while (start < end && isPdfWhiteSpace(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000089 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000090 if (*start == kComment_PdfDelimiter) {
91 // skip the comment until end of line
92 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000093 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000094 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000095 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000096 }
97 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 start++;
100 }
101 }
102 return start;
103}
104
105// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000106static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 //int opened brackets
108 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110
111 SkASSERT(!isPdfWhiteSpace(*start));
112
113 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000114 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115 start++;
116 return start;
117 }
118
119 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121 start++;
122 }
123 return start;
124}
125
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126// last elem has to be ]
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000127static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000128 if (allocator == NULL) {
129 // TODO(edisonn): report/warning error
130 return end;
131 }
132
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000133 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134 while (start < end) {
135 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000136 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000137
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000138 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139
140 if (endOfToken == start) {
141 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
142 return start;
143 }
144
145 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
146 return endOfToken;
147 }
148
149 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000150 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000151 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
152 // we are sure they are not references!
153 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
154 SkPdfObject* gen = array->removeLastInArray();
155 SkPdfObject* id = array->removeLastInArray();
156 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000157 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000158 }
159 array->appendInArray(newObj);
160 }
edisonn@google.com78b38b12013-07-15 18:20:58 +0000161 printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com571c70b2013-07-10 17:09:50 +0000162 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000163 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
164 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000165 return start;
166}
167
168// When we read strings we will rewrite the string so we will reuse the memory
169// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000170
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000171// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
172
173static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
174 TRACE_INDENT(level, "String");
175 const unsigned char* in = start;
176 bool hasOut = (out != NULL);
177
178 int openRoundBrackets = 1;
179 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000180 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
181 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000182 if (openRoundBrackets == 0) {
183 in++; // consumed )
184 break;
185 }
186
edisonn@google.com571c70b2013-07-10 17:09:50 +0000187 if (*in == kEscape_PdfSpecial) {
188 if (in + 1 < end) {
189 switch (in[1]) {
190 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000191 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000192 out++;
193 in += 2;
194 break;
195
196 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000197 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000198 out++;
199 in += 2;
200 break;
201
202 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000203 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000204 out++;
205 in += 2;
206 break;
207
208 case 'b':
209 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000210 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000211 out++;
212 in += 2;
213 break;
214
215 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000216 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000217 out++;
218 in += 2;
219 break;
220
221 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000222 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000223 out++;
224 in += 2;
225 break;
226
227 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000228 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000229 out++;
230 in += 2;
231 break;
232
233 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000234 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000235 out++;
236 in += 2;
237 break;
238
239 case '0':
240 case '1':
241 case '2':
242 case '3':
243 case '4':
244 case '5':
245 case '6':
246 case '7': {
247 //read octals
248 in++; // consume backslash
249
250 int code = 0;
251 int i = 0;
252 while (in < end && *in >= '0' && *in < '8') {
253 code = (code << 3) + ((*in) - '0'); // code * 8 + d
254 i++;
255 in++;
256 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000257 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000258 out++;
259 i = 0;
260 }
261 }
262 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000263 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000264 out++;
265 }
266 }
267 break;
268
269 default:
270 // Per spec, backslash is ignored is escaped ch is unknown
271 in++;
272 break;
273 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000274 } else {
275 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000276 }
277 } else {
278 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
279 // we could have one look that first just inc current, and when we find the backslash
280 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000281 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000282 in++;
283 out++;
284 }
285 }
286
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000287 if (hasOut) {
288 return in; // consumed already ) at the end of the string
289 } else {
290 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
291 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000292}
293
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000294static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
295 return readString(level, start, end, NULL) - start;
296}
297
298static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000299 if (!allocator) {
300 return end;
301 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000302 int outLength = readStringLength(level, start, end);
303 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
304 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
305 start = readString(level, start, end, out);
306 SkPdfObject::makeString(out, out + outLength, str);
307 TRACE_STRING(out, out + outLength);
308 return start; // consumed already ) at the end of the string
309}
310
311static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
312 TRACE_INDENT(level, "HexString");
313 bool hasOut = (out != NULL);
314 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000315
316 unsigned char code = 0;
317
318 while (in < end) {
319 while (in < end && isPdfWhiteSpace(*in)) {
320 in++;
321 }
322
323 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000324 //*in = '\0';
325 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000326 // normal exit
327 break;
328 }
329
330 if (in >= end) {
331 // end too soon
332 break;
333 }
334
335 switch (*in) {
336 case '0':
337 case '1':
338 case '2':
339 case '3':
340 case '4':
341 case '5':
342 case '6':
343 case '7':
344 case '8':
345 case '9':
346 code = (*in - '0') << 4;
347 break;
348
349 case 'a':
350 case 'b':
351 case 'c':
352 case 'd':
353 case 'e':
354 case 'f':
355 code = (*in - 'a' + 10) << 4;
356 break;
357
358 case 'A':
359 case 'B':
360 case 'C':
361 case 'D':
362 case 'E':
363 case 'F':
364 code = (*in - 'A' + 10) << 4;
365 break;
366
367 // TODO(edisonn): spec does not say how to handle this error
368 default:
369 break;
370 }
371
372 in++; // advance
373
374 while (in < end && isPdfWhiteSpace(*in)) {
375 in++;
376 }
377
378 // TODO(edisonn): report error
379 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000380 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000381 out++;
382 break;
383 }
384
385 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000386 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000387 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000388 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000389 break;
390 }
391
392 switch (*in) {
393 case '0':
394 case '1':
395 case '2':
396 case '3':
397 case '4':
398 case '5':
399 case '6':
400 case '7':
401 case '8':
402 case '9':
403 code += (*in - '0');
404 break;
405
406 case 'a':
407 case 'b':
408 case 'c':
409 case 'd':
410 case 'e':
411 case 'f':
412 code += (*in - 'a' + 10);
413 break;
414
415 case 'A':
416 case 'B':
417 case 'C':
418 case 'D':
419 case 'E':
420 case 'F':
421 code += (*in - 'A' + 10);
422 break;
423
424 // TODO(edisonn): spec does not say how to handle this error
425 default:
426 break;
427 }
428
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000429 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000430 out++;
431 in++;
432 }
433
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000434 if (hasOut) {
435 return in; // consumed already > at the end of the string
436 } else {
437 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000438 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000439}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000440
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000441static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
442 return readHexString(level, start, end, NULL) - start;
443}
444
445static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000446 if (!allocator) {
447 return end;
448 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000449 int outLength = readHexStringLength(level, start, end);
450 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
451 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
452 start = readHexString(level, start, end, out);
453 SkPdfObject::makeHexString(out, out + outLength, str);
454 TRACE_HEXSTRING(out, out + outLength);
455 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000456}
457
458// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000459static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
460 TRACE_INDENT(level, "Name");
461 bool hasOut = (out != NULL);
462 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000463
464 unsigned char code = 0;
465
466 while (in < end) {
467 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
468 break;
469 }
470
471 if (*in == '#' && in + 2 < end) {
472 in++;
473 switch (*in) {
474 case '0':
475 case '1':
476 case '2':
477 case '3':
478 case '4':
479 case '5':
480 case '6':
481 case '7':
482 case '8':
483 case '9':
484 code = (*in - '0') << 4;
485 break;
486
487 case 'a':
488 case 'b':
489 case 'c':
490 case 'd':
491 case 'e':
492 case 'f':
493 code = (*in - 'a' + 10) << 4;
494 break;
495
496 case 'A':
497 case 'B':
498 case 'C':
499 case 'D':
500 case 'E':
501 case 'F':
502 code = (*in - 'A' + 10) << 4;
503 break;
504
505 // TODO(edisonn): spec does not say how to handle this error
506 default:
507 break;
508 }
509
510 in++; // advance
511
512 switch (*in) {
513 case '0':
514 case '1':
515 case '2':
516 case '3':
517 case '4':
518 case '5':
519 case '6':
520 case '7':
521 case '8':
522 case '9':
523 code += (*in - '0');
524 break;
525
526 case 'a':
527 case 'b':
528 case 'c':
529 case 'd':
530 case 'e':
531 case 'f':
532 code += (*in - 'a' + 10);
533 break;
534
535 case 'A':
536 case 'B':
537 case 'C':
538 case 'D':
539 case 'E':
540 case 'F':
541 code += (*in - 'A' + 10);
542 break;
543
544 // TODO(edisonn): spec does not say how to handle this error
545 default:
546 break;
547 }
548
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000549 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000550 out++;
551 in++;
552 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000553 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000554 out++;
555 in++;
556 }
557 }
558
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000559 if (hasOut) {
560 return in;
561 } else {
562 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
563 }
564}
565
566static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
567 return readName(level, start, end, NULL) - start;
568}
569
570static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000571 if (!allocator) {
572 return end;
573 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000574 int outLength = readNameLength(level, start, end);
575 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
576 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
577 start = readName(level, start, end, out);
578 SkPdfObject::makeName(out, out + outLength, name);
579 TRACE_NAME(out, out + outLength);
580 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000581}
582
583// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
584// that makes for an interesting scenario, where the stream itself contains endstream, together
585// with a reference object with the length, but the real length object would be somewhere else
586// it could confuse the parser
587/*example:
588
5897 0 obj
590<< /length 8 0 R>>
591stream
592...............
593endstream
5948 0 obj #we are in stream actually, not a real object
595<< 10 >> #we are in stream actually, not a real object
596endobj
597endstream
5988 0 obj #real obj
599<< 100 >> #real obj
600endobj
601and it could get worse, with multiple object like this
602*/
603
604// right now implement the silly algorithm that assumes endstream is finishing the stream
605
606
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000607static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
608 TRACE_INDENT(level, "Stream");
609 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000610 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
611 // no stream. return.
612 return start;
613 }
614
615 start += 6; // strlen("stream")
616 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
617 start += 2;
618 } else if (start[0] == kLF_PdfWhiteSpace) {
619 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000620 } else if (isPdfWhiteSpace(start[0])) {
621 start += 1;
622 } else {
623 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
624 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000625 }
626
627 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
628 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000629 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000630
631 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000632 if (stream->has_Length() && stream->Length(doc) > 0) {
633 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000634 }
635
636 // TODO(edisonn): laod external streams
637 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
638
639 if (length < 0) {
640 // scan the buffer, until we find first endstream
641 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000642 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000643
644 if (endstream) {
645 length = endstream - start;
646 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000647 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000648 }
649 }
650 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000651 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000652
653 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
654 endstream += 2;
655 } else if (endstream[0] == kLF_PdfWhiteSpace) {
656 endstream += 1;
657 }
658
659 // TODO(edisonn): verify the next bytes are "endstream"
660
661 endstream += strlen("endstream");
662 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000663 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000664 return endstream;
665 }
666 return start;
667}
668
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000669static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
670 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000671 // We already processed ID keyword, and we should be positioned immediately after it
672
673 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
674 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
675 start += 2;
676 } else if (start[0] == kLF_PdfWhiteSpace) {
677 start += 1;
678 } else if (isPdfWhiteSpace(start[0])) {
679 start += 1;
680 } else {
681 SkASSERT(isPdfDelimiter(start[0]));
682 // TODO(edisonn): warning?
683 }
684
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000685 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
686 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000687
688 if (endstream) {
689 int length = endstream - start;
690 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
691 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
692 inlineImage->addStream(start, (size_t)length);
693 } else {
694 // TODO(edisonn): report error in inline image stream (ID-EI) section
695 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
696 return end;
697 }
698 return endEI;
699}
700
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000701static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000702 if (allocator == NULL) {
703 // TODO(edisonn): report/warning error
704 return end;
705 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000706 TRACE_INDENT(level, "Dictionary");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000707 SkPdfObject::makeEmptyDictionary(dict);
708
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000709 start = skipPdfWhiteSpaces(level, start, end);
710 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000711
712 while (start < end && *start == kNamed_PdfDelimiter) {
713 SkPdfObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000714 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000715 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000716 start = readName(level + 1, start, end, &key, &tmpStorage);
717 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000718
719 if (start < end) {
720 SkPdfObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000721 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000722
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000723 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000724
725 if (start < end) {
726 // seems we have an indirect reference
727 if (isPdfDigit(*start)) {
728 SkPdfObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000729 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000730
731 SkPdfObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000732 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000733
734 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
735 int64_t id = value->intValue();
736 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000737 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000738 dict->set(&key, value);
739 } else {
740 // error, ignore
741 dict->set(&key, value);
742 }
743 } else {
744 // next elem is not a digit, but it might not be / either!
745 dict->set(&key, value);
746 }
747 } else {
748 // /key >>
749 dict->set(&key, value);
750 return end;
751 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000752 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000753 } else {
754 dict->set(&key, &SkPdfObject::kNull);
755 return end;
756 }
757 }
758
759 // TODO(edisonn): options to ignore these errors
760
761 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000762 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000763 if (*start != kClosedInequityBracket_PdfDelimiter) {
764 // TODO(edisonn): report/warning
765 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000766 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000767 start++; // skip >
768 if (*start != kClosedInequityBracket_PdfDelimiter) {
769 // TODO(edisonn): report/warning
770 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000771 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000772 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000773
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000774 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000775
776 return start;
777}
778
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000779const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
780 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000781
782 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000783 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000784
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000785 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000786
787 // no token, len would be 0
788 if (current == start) {
789 return NULL;
790 }
791
792 int tokenLen = current - start;
793
794 if (tokenLen == 1) {
795 // start array
796 switch (*start) {
797 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000798 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000799 SkPdfObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000800 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000801
802 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000803 //*start = '\0';
804 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000805
806 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000807 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000808 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000809 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000810 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000811 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000812 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000813 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000814 }
815
816 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000817 //*start = '\0';
818 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000819
820 // TODO(edisonn): what to do curly brackets? read spec!
821 case kOpenedCurlyBracket_PdfDelimiter:
822 default:
823 break;
824 }
825
826 SkASSERT(!isPdfWhiteSpace(*start));
827 if (isPdfDelimiter(*start)) {
828 // TODO(edisonn): how stream ] } > ) will be handled?
829 // for now ignore, and it will become a keyword to be ignored
830 }
831 }
832
833 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
834 SkPdfObject::makeNull(token);
835 return current;
836 }
837
838 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
839 SkPdfObject::makeBoolean(true, token);
840 return current;
841 }
842
843 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
844 SkPdfObject::makeBoolean(false, token);
845 return current;
846 }
847
848 if (isPdfNumeric(*start)) {
849 SkPdfObject::makeNumeric(start, current, token);
850 } else {
851 SkPdfObject::makeKeyword(start, current, token);
852 }
853 return current;
854}
855
856SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000857 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000858 return new SkPdfObject[BUFFER_SIZE];
859}
860
861SkPdfAllocator::~SkPdfAllocator() {
862 for (int i = 0 ; i < fHandles.count(); i++) {
863 free(fHandles[i]);
864 }
865 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000866 for (int j = 0 ; j < BUFFER_SIZE; j++) {
867 fHistory[i][j].reset();
868 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000869 delete[] fHistory[i];
870 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000871 for (int j = 0 ; j < BUFFER_SIZE; j++) {
872 fCurrent[j].reset();
873 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000874 delete[] fCurrent;
875}
876
877SkPdfObject* SkPdfAllocator::allocObject() {
878 if (fCurrentUsed >= BUFFER_SIZE) {
879 fHistory.push(fCurrent);
880 fCurrent = allocBlock();
881 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000882 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000883 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000884 fCurrentUsed++;
885 return &fCurrent[fCurrentUsed - 1];
886}
887
888// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000889SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000890 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000891 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000892 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000893 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000894 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000895 if (endobj) {
896 len = endobj - (char*)buffer + strlen("endobj");
897 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000898 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000899 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000900}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000901
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000902SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000903 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000904 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000905 if (endobj) {
906 len = endobj - (char*)buffer + strlen("endobj");
907 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000908 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000909 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000910}
911
912SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000913}
914
915bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
916 token->fKeyword = NULL;
917 token->fObject = NULL;
918
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000919 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000920 if (fUncompressedStream >= fUncompressedStreamEnd) {
921 return false;
922 }
923
924 SkPdfObject obj;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000925 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000926
927 // If it is a keyword, we will only get the pointer of the string
928 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
929 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000930 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000931 token->fType = kKeyword_TokenType;
932 } else {
933 SkPdfObject* pobj = fAllocator->allocObject();
934 *pobj = obj;
935 token->fObject = pobj;
936 token->fType = kObject_TokenType;
937 }
938
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000939#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000940 static int read_op = 0;
941 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000942 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000943 printf("break;\n");
944 }
945 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
946#endif
947
948 return true;
949}
950
951void SkPdfNativeTokenizer::PutBack(PdfToken token) {
952 SkASSERT(!fHasPutBack);
953 fHasPutBack = true;
954 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000955#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000956 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
957#endif
958}
959
960bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
961 if (fHasPutBack) {
962 *token = fPutBack;
963 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000964#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000965 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
966#endif
967 return true;
968 }
969
970 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000971#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000972 printf("EMPTY TOKENIZER\n");
973#endif
974 return false;
975 }
976
977 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000978}
edisonn@google.com78b38b12013-07-15 18:20:58 +0000979
980#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
981
982// keys
983DECLARE_PDF_NAME(BitsPerComponent);
984DECLARE_PDF_NAME(ColorSpace);
985DECLARE_PDF_NAME(Decode);
986DECLARE_PDF_NAME(DecodeParms);
987DECLARE_PDF_NAME(Filter);
988DECLARE_PDF_NAME(Height);
989DECLARE_PDF_NAME(ImageMask);
990DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
991DECLARE_PDF_NAME(Interpolate);
992DECLARE_PDF_NAME(Width);
993
994// values
995DECLARE_PDF_NAME(DeviceGray);
996DECLARE_PDF_NAME(DeviceRGB);
997DECLARE_PDF_NAME(DeviceCMYK);
998DECLARE_PDF_NAME(Indexed);
999DECLARE_PDF_NAME(ASCIIHexDecode);
1000DECLARE_PDF_NAME(ASCII85Decode);
1001DECLARE_PDF_NAME(LZWDecode);
1002DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1003DECLARE_PDF_NAME(RunLengthDecode);
1004DECLARE_PDF_NAME(CCITTFaxDecode);
1005DECLARE_PDF_NAME(DCTDecode);
1006
1007#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1008
1009
1010static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
1011 if (!key || !key->isName()) {
1012 return key;
1013 }
1014
1015 // TODO(edisonn): use autogenerated code!
1016 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1017 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1018 HANDLE_NAME_ABBR(key, Decode, D);
1019 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1020 HANDLE_NAME_ABBR(key, Filter, F);
1021 HANDLE_NAME_ABBR(key, Height, H);
1022 HANDLE_NAME_ABBR(key, ImageMask, IM);
1023// HANDLE_NAME_ABBR(key, Intent, );
1024 HANDLE_NAME_ABBR(key, Interpolate, I);
1025 HANDLE_NAME_ABBR(key, Width, W);
1026
1027 return key;
1028}
1029
1030static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
1031 if (!value || !value->isName()) {
1032 return value;
1033 }
1034
1035 // TODO(edisonn): use autogenerated code!
1036 HANDLE_NAME_ABBR(value, DeviceGray, G);
1037 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1038 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1039 HANDLE_NAME_ABBR(value, Indexed, I);
1040 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1041 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1042 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1043 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1044 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1045 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1046 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1047
1048 return value;
1049}
1050
1051SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1052 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001053 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001054 if (fUncompressedStream >= fUncompressedStreamEnd) {
1055 return NULL;
1056 }
1057
1058 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
1059 SkPdfObject::makeEmptyDictionary(inlineImage);
1060
1061 while (fUncompressedStream < fUncompressedStreamEnd) {
1062 SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001063 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001064
edisonn@google.come878e722013-07-29 19:10:58 +00001065 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001066 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001067 return inlineImage;
1068 } else {
1069 SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001070 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001071 // TODO(edisonn): perf maybe we should not expand abreviation like this
1072 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1073 inlineImageValueAbbreviationExpand(obj));
1074 }
1075 }
1076 // TODO(edisonn): report end of data with inline image without an EI
1077 return inlineImage;
1078}