blob: 7d8bcb695a645a34e1db60458920631cc969dea7 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com78b38b12013-07-15 18:20:58 +00007#include "SkPdfImageDictionary_autogen.h"
8
9// TODO(edisonn): perf!!!
10// there could be 0s between start and end! but not in the needle.
11static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
12 int needleLen = strlen(needle);
13 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
14 strncmp(hayStart, needle, needleLen) == 0) {
15 return hayStart;
16 }
17
18 hayStart++;
19
20 while (hayStart < hayEnd) {
21 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
22 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26 hayStart++;
27 }
28 return NULL;
29}
30
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000031#ifdef PDF_TRACE
32static void TRACE_INDENT(int level, const char* type) {
33 static int id = 0;
34 id++;
35 if (478613 == id) {
36 printf("break;\n");
37 }
38 // all types should have 2 letters, so the text is alligned nicely
39 printf("\n%10i %15s: ", id, type);
40 for (int i = 0 ; i < level; i++) {
41 printf(" ");
42 }
43}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000044
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045static void TRACE_COMMENT(char ch) {
46 printf("%c", ch);
47}
48
49static void TRACE_TK(char ch) {
50 printf("%c", ch);
51}
52
53static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
54 while (start < end) {
55 printf("%c", *start);
56 start++;
57 }
58 printf("\n");
59}
60
61static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
62 while (start < end) {
63 printf("%c", *start);
64 start++;
65 }
66 printf("\n");
67}
68
69static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
70 while (start < end) {
71 printf("%c", *start);
72 start++;
73 }
74 printf("\n");
75}
76
77#else
78#define TRACE_INDENT(level,type)
79#define TRACE_COMMENT(ch)
80#define TRACE_TK(ch)
81#define TRACE_NAME(start,end)
82#define TRACE_STRING(start,end)
83#define TRACE_HEXSTRING(start,end)
84#endif
85
86static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
87 TRACE_INDENT(level, "White Space");
edisonn@google.com571c70b2013-07-10 17:09:50 +000088 while (start < end && isPdfWhiteSpace(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000089 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000090 if (*start == kComment_PdfDelimiter) {
91 // skip the comment until end of line
92 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000093 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000094 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000095 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000096 }
97 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 start++;
100 }
101 }
102 return start;
103}
104
105// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000106static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 //int opened brackets
108 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110
111 SkASSERT(!isPdfWhiteSpace(*start));
112
113 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000114 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115 start++;
116 return start;
117 }
118
119 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121 start++;
122 }
123 return start;
124}
125
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126// last elem has to be ]
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000127static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000128 if (allocator == NULL) {
129 // TODO(edisonn): report/warning error
130 return end;
131 }
132
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000133 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134 while (start < end) {
135 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000136 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000137
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000138 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139
140 if (endOfToken == start) {
141 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
142 return start;
143 }
144
145 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
146 return endOfToken;
147 }
148
149 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000150 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000151 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
152 // we are sure they are not references!
153 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
154 SkPdfObject* gen = array->removeLastInArray();
155 SkPdfObject* id = array->removeLastInArray();
156 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000157 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000158 }
159 array->appendInArray(newObj);
160 }
edisonn@google.com78b38b12013-07-15 18:20:58 +0000161 printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com571c70b2013-07-10 17:09:50 +0000162 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000163 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
164 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000165 return start;
166}
167
168// When we read strings we will rewrite the string so we will reuse the memory
169// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000170
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000171// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
172
173static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
174 TRACE_INDENT(level, "String");
175 const unsigned char* in = start;
176 bool hasOut = (out != NULL);
177
178 int openRoundBrackets = 1;
179 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000180 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
181 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000182 if (openRoundBrackets == 0) {
183 in++; // consumed )
184 break;
185 }
186
edisonn@google.com571c70b2013-07-10 17:09:50 +0000187 if (*in == kEscape_PdfSpecial) {
188 if (in + 1 < end) {
189 switch (in[1]) {
190 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000191 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000192 out++;
193 in += 2;
194 break;
195
196 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000197 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000198 out++;
199 in += 2;
200 break;
201
202 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000203 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000204 out++;
205 in += 2;
206 break;
207
208 case 'b':
209 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000210 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000211 out++;
212 in += 2;
213 break;
214
215 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000216 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000217 out++;
218 in += 2;
219 break;
220
221 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000222 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000223 out++;
224 in += 2;
225 break;
226
227 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000228 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000229 out++;
230 in += 2;
231 break;
232
233 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000234 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000235 out++;
236 in += 2;
237 break;
238
239 case '0':
240 case '1':
241 case '2':
242 case '3':
243 case '4':
244 case '5':
245 case '6':
246 case '7': {
247 //read octals
248 in++; // consume backslash
249
250 int code = 0;
251 int i = 0;
252 while (in < end && *in >= '0' && *in < '8') {
253 code = (code << 3) + ((*in) - '0'); // code * 8 + d
254 i++;
255 in++;
256 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000257 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000258 out++;
259 i = 0;
260 }
261 }
262 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000263 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000264 out++;
265 }
266 }
267 break;
268
269 default:
270 // Per spec, backslash is ignored is escaped ch is unknown
271 in++;
272 break;
273 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000274 } else {
275 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000276 }
277 } else {
278 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
279 // we could have one look that first just inc current, and when we find the backslash
280 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000281 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000282 in++;
283 out++;
284 }
285 }
286
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000287 if (hasOut) {
288 return in; // consumed already ) at the end of the string
289 } else {
290 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
291 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000292}
293
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000294static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
295 return readString(level, start, end, NULL) - start;
296}
297
298static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000299 if (!allocator) {
300 return end;
301 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000302 int outLength = readStringLength(level, start, end);
303 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
304 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
305 start = readString(level, start, end, out);
306 SkPdfObject::makeString(out, out + outLength, str);
307 TRACE_STRING(out, out + outLength);
308 return start; // consumed already ) at the end of the string
309}
310
311static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
312 TRACE_INDENT(level, "HexString");
313 bool hasOut = (out != NULL);
314 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000315
316 unsigned char code = 0;
317
318 while (in < end) {
319 while (in < end && isPdfWhiteSpace(*in)) {
320 in++;
321 }
322
323 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000324 //*in = '\0';
325 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000326 // normal exit
327 break;
328 }
329
330 if (in >= end) {
331 // end too soon
332 break;
333 }
334
335 switch (*in) {
336 case '0':
337 case '1':
338 case '2':
339 case '3':
340 case '4':
341 case '5':
342 case '6':
343 case '7':
344 case '8':
345 case '9':
346 code = (*in - '0') << 4;
347 break;
348
349 case 'a':
350 case 'b':
351 case 'c':
352 case 'd':
353 case 'e':
354 case 'f':
355 code = (*in - 'a' + 10) << 4;
356 break;
357
358 case 'A':
359 case 'B':
360 case 'C':
361 case 'D':
362 case 'E':
363 case 'F':
364 code = (*in - 'A' + 10) << 4;
365 break;
366
367 // TODO(edisonn): spec does not say how to handle this error
368 default:
369 break;
370 }
371
372 in++; // advance
373
374 while (in < end && isPdfWhiteSpace(*in)) {
375 in++;
376 }
377
378 // TODO(edisonn): report error
379 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000380 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000381 out++;
382 break;
383 }
384
385 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000386 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000387 out++;
388 break;
389 }
390
391 switch (*in) {
392 case '0':
393 case '1':
394 case '2':
395 case '3':
396 case '4':
397 case '5':
398 case '6':
399 case '7':
400 case '8':
401 case '9':
402 code += (*in - '0');
403 break;
404
405 case 'a':
406 case 'b':
407 case 'c':
408 case 'd':
409 case 'e':
410 case 'f':
411 code += (*in - 'a' + 10);
412 break;
413
414 case 'A':
415 case 'B':
416 case 'C':
417 case 'D':
418 case 'E':
419 case 'F':
420 code += (*in - 'A' + 10);
421 break;
422
423 // TODO(edisonn): spec does not say how to handle this error
424 default:
425 break;
426 }
427
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000428 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000429 out++;
430 in++;
431 }
432
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000433 if (hasOut) {
434 return in; // consumed already > at the end of the string
435 } else {
436 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000437 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000438}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000439
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000440static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
441 return readHexString(level, start, end, NULL) - start;
442}
443
444static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000445 if (!allocator) {
446 return end;
447 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000448 int outLength = readHexStringLength(level, start, end);
449 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
450 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
451 start = readHexString(level, start, end, out);
452 SkPdfObject::makeHexString(out, out + outLength, str);
453 TRACE_HEXSTRING(out, out + outLength);
454 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000455}
456
457// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000458static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
459 TRACE_INDENT(level, "Name");
460 bool hasOut = (out != NULL);
461 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000462
463 unsigned char code = 0;
464
465 while (in < end) {
466 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
467 break;
468 }
469
470 if (*in == '#' && in + 2 < end) {
471 in++;
472 switch (*in) {
473 case '0':
474 case '1':
475 case '2':
476 case '3':
477 case '4':
478 case '5':
479 case '6':
480 case '7':
481 case '8':
482 case '9':
483 code = (*in - '0') << 4;
484 break;
485
486 case 'a':
487 case 'b':
488 case 'c':
489 case 'd':
490 case 'e':
491 case 'f':
492 code = (*in - 'a' + 10) << 4;
493 break;
494
495 case 'A':
496 case 'B':
497 case 'C':
498 case 'D':
499 case 'E':
500 case 'F':
501 code = (*in - 'A' + 10) << 4;
502 break;
503
504 // TODO(edisonn): spec does not say how to handle this error
505 default:
506 break;
507 }
508
509 in++; // advance
510
511 switch (*in) {
512 case '0':
513 case '1':
514 case '2':
515 case '3':
516 case '4':
517 case '5':
518 case '6':
519 case '7':
520 case '8':
521 case '9':
522 code += (*in - '0');
523 break;
524
525 case 'a':
526 case 'b':
527 case 'c':
528 case 'd':
529 case 'e':
530 case 'f':
531 code += (*in - 'a' + 10);
532 break;
533
534 case 'A':
535 case 'B':
536 case 'C':
537 case 'D':
538 case 'E':
539 case 'F':
540 code += (*in - 'A' + 10);
541 break;
542
543 // TODO(edisonn): spec does not say how to handle this error
544 default:
545 break;
546 }
547
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000548 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000549 out++;
550 in++;
551 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000552 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000553 out++;
554 in++;
555 }
556 }
557
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000558 if (hasOut) {
559 return in;
560 } else {
561 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
562 }
563}
564
565static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
566 return readName(level, start, end, NULL) - start;
567}
568
569static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000570 if (!allocator) {
571 return end;
572 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000573 int outLength = readNameLength(level, start, end);
574 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
575 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
576 start = readName(level, start, end, out);
577 SkPdfObject::makeName(out, out + outLength, name);
578 TRACE_NAME(out, out + outLength);
579 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000580}
581
582// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
583// that makes for an interesting scenario, where the stream itself contains endstream, together
584// with a reference object with the length, but the real length object would be somewhere else
585// it could confuse the parser
586/*example:
587
5887 0 obj
589<< /length 8 0 R>>
590stream
591...............
592endstream
5938 0 obj #we are in stream actually, not a real object
594<< 10 >> #we are in stream actually, not a real object
595endobj
596endstream
5978 0 obj #real obj
598<< 100 >> #real obj
599endobj
600and it could get worse, with multiple object like this
601*/
602
603// right now implement the silly algorithm that assumes endstream is finishing the stream
604
605
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000606static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
607 TRACE_INDENT(level, "Stream");
608 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000609 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
610 // no stream. return.
611 return start;
612 }
613
614 start += 6; // strlen("stream")
615 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
616 start += 2;
617 } else if (start[0] == kLF_PdfWhiteSpace) {
618 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000619 } else if (isPdfWhiteSpace(start[0])) {
620 start += 1;
621 } else {
622 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
623 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000624 }
625
626 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
627 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000628 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000629
630 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000631 if (stream->has_Length() && stream->Length(doc) > 0) {
632 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000633 }
634
635 // TODO(edisonn): laod external streams
636 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
637
638 if (length < 0) {
639 // scan the buffer, until we find first endstream
640 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000641 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000642
643 if (endstream) {
644 length = endstream - start;
645 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000646 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000647 }
648 }
649 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000650 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000651
652 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
653 endstream += 2;
654 } else if (endstream[0] == kLF_PdfWhiteSpace) {
655 endstream += 1;
656 }
657
658 // TODO(edisonn): verify the next bytes are "endstream"
659
660 endstream += strlen("endstream");
661 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000662 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000663 return endstream;
664 }
665 return start;
666}
667
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000668static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
669 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000670 // We already processed ID keyword, and we should be positioned immediately after it
671
672 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
673 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
674 start += 2;
675 } else if (start[0] == kLF_PdfWhiteSpace) {
676 start += 1;
677 } else if (isPdfWhiteSpace(start[0])) {
678 start += 1;
679 } else {
680 SkASSERT(isPdfDelimiter(start[0]));
681 // TODO(edisonn): warning?
682 }
683
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000684 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
685 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000686
687 if (endstream) {
688 int length = endstream - start;
689 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
690 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
691 inlineImage->addStream(start, (size_t)length);
692 } else {
693 // TODO(edisonn): report error in inline image stream (ID-EI) section
694 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
695 return end;
696 }
697 return endEI;
698}
699
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000700static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000701 if (allocator == NULL) {
702 // TODO(edisonn): report/warning error
703 return end;
704 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000705 TRACE_INDENT(level, "Dictionary");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000706 SkPdfObject::makeEmptyDictionary(dict);
707
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000708 start = skipPdfWhiteSpaces(level, start, end);
709 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000710
711 while (start < end && *start == kNamed_PdfDelimiter) {
712 SkPdfObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000713 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000714 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000715 start = readName(level + 1, start, end, &key, &tmpStorage);
716 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000717
718 if (start < end) {
719 SkPdfObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000720 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000721
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000722 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000723
724 if (start < end) {
725 // seems we have an indirect reference
726 if (isPdfDigit(*start)) {
727 SkPdfObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000728 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000729
730 SkPdfObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000731 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000732
733 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
734 int64_t id = value->intValue();
735 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000736 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000737 dict->set(&key, value);
738 } else {
739 // error, ignore
740 dict->set(&key, value);
741 }
742 } else {
743 // next elem is not a digit, but it might not be / either!
744 dict->set(&key, value);
745 }
746 } else {
747 // /key >>
748 dict->set(&key, value);
749 return end;
750 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000751 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000752 } else {
753 dict->set(&key, &SkPdfObject::kNull);
754 return end;
755 }
756 }
757
758 // TODO(edisonn): options to ignore these errors
759
760 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000761 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000762 if (*start != kClosedInequityBracket_PdfDelimiter) {
763 // TODO(edisonn): report/warning
764 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000765 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000766 start++; // skip >
767 if (*start != kClosedInequityBracket_PdfDelimiter) {
768 // TODO(edisonn): report/warning
769 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000770 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000771 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000772
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000773 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000774
775 return start;
776}
777
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000778const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
779 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000780
781 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000782 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000783
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000784 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000785
786 // no token, len would be 0
787 if (current == start) {
788 return NULL;
789 }
790
791 int tokenLen = current - start;
792
793 if (tokenLen == 1) {
794 // start array
795 switch (*start) {
796 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000797 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000798 SkPdfObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000799 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800
801 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000802 //*start = '\0';
803 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000804
805 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000806 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000807 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000808 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000809 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000810 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000811 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000812 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000813 }
814
815 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000816 //*start = '\0';
817 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000818
819 // TODO(edisonn): what to do curly brackets? read spec!
820 case kOpenedCurlyBracket_PdfDelimiter:
821 default:
822 break;
823 }
824
825 SkASSERT(!isPdfWhiteSpace(*start));
826 if (isPdfDelimiter(*start)) {
827 // TODO(edisonn): how stream ] } > ) will be handled?
828 // for now ignore, and it will become a keyword to be ignored
829 }
830 }
831
832 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
833 SkPdfObject::makeNull(token);
834 return current;
835 }
836
837 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
838 SkPdfObject::makeBoolean(true, token);
839 return current;
840 }
841
842 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
843 SkPdfObject::makeBoolean(false, token);
844 return current;
845 }
846
847 if (isPdfNumeric(*start)) {
848 SkPdfObject::makeNumeric(start, current, token);
849 } else {
850 SkPdfObject::makeKeyword(start, current, token);
851 }
852 return current;
853}
854
855SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000856 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000857 return new SkPdfObject[BUFFER_SIZE];
858}
859
860SkPdfAllocator::~SkPdfAllocator() {
861 for (int i = 0 ; i < fHandles.count(); i++) {
862 free(fHandles[i]);
863 }
864 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000865 for (int j = 0 ; j < BUFFER_SIZE; j++) {
866 fHistory[i][j].reset();
867 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000868 delete[] fHistory[i];
869 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000870 for (int j = 0 ; j < BUFFER_SIZE; j++) {
871 fCurrent[j].reset();
872 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000873 delete[] fCurrent;
874}
875
876SkPdfObject* SkPdfAllocator::allocObject() {
877 if (fCurrentUsed >= BUFFER_SIZE) {
878 fHistory.push(fCurrent);
879 fCurrent = allocBlock();
880 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000881 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000882 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000883 fCurrentUsed++;
884 return &fCurrent[fCurrentUsed - 1];
885}
886
887// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000888SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000889 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000890 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000891 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000892 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000893 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000894 if (endobj) {
895 len = endobj - (char*)buffer + strlen("endobj");
896 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000897 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000898 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000899}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000900
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000901SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000902 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000903 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000904 if (endobj) {
905 len = endobj - (char*)buffer + strlen("endobj");
906 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000907 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000908 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000909}
910
911SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000912}
913
914bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
915 token->fKeyword = NULL;
916 token->fObject = NULL;
917
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000918 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000919 if (fUncompressedStream >= fUncompressedStreamEnd) {
920 return false;
921 }
922
923 SkPdfObject obj;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000924 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000925
926 // If it is a keyword, we will only get the pointer of the string
927 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
928 token->fKeyword = obj.c_str();
929 token->fKeywordLength = obj.len();
930 token->fType = kKeyword_TokenType;
931 } else {
932 SkPdfObject* pobj = fAllocator->allocObject();
933 *pobj = obj;
934 token->fObject = pobj;
935 token->fType = kObject_TokenType;
936 }
937
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000938#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000939 static int read_op = 0;
940 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000941 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000942 printf("break;\n");
943 }
944 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
945#endif
946
947 return true;
948}
949
950void SkPdfNativeTokenizer::PutBack(PdfToken token) {
951 SkASSERT(!fHasPutBack);
952 fHasPutBack = true;
953 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000954#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000955 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
956#endif
957}
958
959bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
960 if (fHasPutBack) {
961 *token = fPutBack;
962 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000963#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000964 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
965#endif
966 return true;
967 }
968
969 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000970#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000971 printf("EMPTY TOKENIZER\n");
972#endif
973 return false;
974 }
975
976 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000977}
edisonn@google.com78b38b12013-07-15 18:20:58 +0000978
979#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
980
981// keys
982DECLARE_PDF_NAME(BitsPerComponent);
983DECLARE_PDF_NAME(ColorSpace);
984DECLARE_PDF_NAME(Decode);
985DECLARE_PDF_NAME(DecodeParms);
986DECLARE_PDF_NAME(Filter);
987DECLARE_PDF_NAME(Height);
988DECLARE_PDF_NAME(ImageMask);
989DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
990DECLARE_PDF_NAME(Interpolate);
991DECLARE_PDF_NAME(Width);
992
993// values
994DECLARE_PDF_NAME(DeviceGray);
995DECLARE_PDF_NAME(DeviceRGB);
996DECLARE_PDF_NAME(DeviceCMYK);
997DECLARE_PDF_NAME(Indexed);
998DECLARE_PDF_NAME(ASCIIHexDecode);
999DECLARE_PDF_NAME(ASCII85Decode);
1000DECLARE_PDF_NAME(LZWDecode);
1001DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1002DECLARE_PDF_NAME(RunLengthDecode);
1003DECLARE_PDF_NAME(CCITTFaxDecode);
1004DECLARE_PDF_NAME(DCTDecode);
1005
1006#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1007
1008
1009static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
1010 if (!key || !key->isName()) {
1011 return key;
1012 }
1013
1014 // TODO(edisonn): use autogenerated code!
1015 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1016 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1017 HANDLE_NAME_ABBR(key, Decode, D);
1018 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1019 HANDLE_NAME_ABBR(key, Filter, F);
1020 HANDLE_NAME_ABBR(key, Height, H);
1021 HANDLE_NAME_ABBR(key, ImageMask, IM);
1022// HANDLE_NAME_ABBR(key, Intent, );
1023 HANDLE_NAME_ABBR(key, Interpolate, I);
1024 HANDLE_NAME_ABBR(key, Width, W);
1025
1026 return key;
1027}
1028
1029static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
1030 if (!value || !value->isName()) {
1031 return value;
1032 }
1033
1034 // TODO(edisonn): use autogenerated code!
1035 HANDLE_NAME_ABBR(value, DeviceGray, G);
1036 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1037 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1038 HANDLE_NAME_ABBR(value, Indexed, I);
1039 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1040 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1041 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1042 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1043 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1044 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1045 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1046
1047 return value;
1048}
1049
1050SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1051 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001052 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001053 if (fUncompressedStream >= fUncompressedStreamEnd) {
1054 return NULL;
1055 }
1056
1057 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
1058 SkPdfObject::makeEmptyDictionary(inlineImage);
1059
1060 while (fUncompressedStream < fUncompressedStreamEnd) {
1061 SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001062 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001063
1064 if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001065 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001066 return inlineImage;
1067 } else {
1068 SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001069 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001070 // TODO(edisonn): perf maybe we should not expand abreviation like this
1071 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1072 inlineImageValueAbbreviationExpand(obj));
1073 }
1074 }
1075 // TODO(edisonn): report end of data with inline image without an EI
1076 return inlineImage;
1077}