blob: cc5788bfd796ba4ea93dc29b2d9c4fef92e52095 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com78b38b12013-07-15 18:20:58 +00007#include "SkPdfImageDictionary_autogen.h"
8
9// TODO(edisonn): perf!!!
10// there could be 0s between start and end! but not in the needle.
11static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
12 int needleLen = strlen(needle);
13 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
14 strncmp(hayStart, needle, needleLen) == 0) {
15 return hayStart;
16 }
17
18 hayStart++;
19
20 while (hayStart < hayEnd) {
21 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
22 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26 hayStart++;
27 }
28 return NULL;
29}
30
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000031#ifdef PDF_TRACE
32static void TRACE_INDENT(int level, const char* type) {
33 static int id = 0;
34 id++;
35 if (478613 == id) {
36 printf("break;\n");
37 }
38 // all types should have 2 letters, so the text is alligned nicely
39 printf("\n%10i %15s: ", id, type);
40 for (int i = 0 ; i < level; i++) {
41 printf(" ");
42 }
43}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000044
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045static void TRACE_COMMENT(char ch) {
46 printf("%c", ch);
47}
48
49static void TRACE_TK(char ch) {
50 printf("%c", ch);
51}
52
53static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
54 while (start < end) {
55 printf("%c", *start);
56 start++;
57 }
58 printf("\n");
59}
60
61static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
62 while (start < end) {
63 printf("%c", *start);
64 start++;
65 }
66 printf("\n");
67}
68
69static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
70 while (start < end) {
71 printf("%c", *start);
72 start++;
73 }
74 printf("\n");
75}
76
77#else
78#define TRACE_INDENT(level,type)
79#define TRACE_COMMENT(ch)
80#define TRACE_TK(ch)
81#define TRACE_NAME(start,end)
82#define TRACE_STRING(start,end)
83#define TRACE_HEXSTRING(start,end)
84#endif
85
86static const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
87 TRACE_INDENT(level, "White Space");
edisonn@google.com571c70b2013-07-10 17:09:50 +000088 while (start < end && isPdfWhiteSpace(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000089 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000090 if (*start == kComment_PdfDelimiter) {
91 // skip the comment until end of line
92 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000093 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000094 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000095 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000096 }
97 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 start++;
100 }
101 }
102 return start;
103}
104
105// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000106static const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 //int opened brackets
108 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110
111 SkASSERT(!isPdfWhiteSpace(*start));
112
113 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000114 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115 start++;
116 return start;
117 }
118
119 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121 start++;
122 }
123 return start;
124}
125
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126// last elem has to be ]
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000127static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
128 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000129 while (start < end) {
130 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000131 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000133 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134
135 if (endOfToken == start) {
136 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
137 return start;
138 }
139
140 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
141 return endOfToken;
142 }
143
144 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000145 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000146 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
147 // we are sure they are not references!
148 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
149 SkPdfObject* gen = array->removeLastInArray();
150 SkPdfObject* id = array->removeLastInArray();
151 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000152 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000153 }
154 array->appendInArray(newObj);
155 }
edisonn@google.com78b38b12013-07-15 18:20:58 +0000156 printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com571c70b2013-07-10 17:09:50 +0000157 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000158 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
159 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000160 return start;
161}
162
163// When we read strings we will rewrite the string so we will reuse the memory
164// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000165
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000166// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
167
168static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
169 TRACE_INDENT(level, "String");
170 const unsigned char* in = start;
171 bool hasOut = (out != NULL);
172
173 int openRoundBrackets = 1;
174 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000175 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
176 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000177 if (openRoundBrackets == 0) {
178 in++; // consumed )
179 break;
180 }
181
edisonn@google.com571c70b2013-07-10 17:09:50 +0000182 if (*in == kEscape_PdfSpecial) {
183 if (in + 1 < end) {
184 switch (in[1]) {
185 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000186 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000187 out++;
188 in += 2;
189 break;
190
191 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000192 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000193 out++;
194 in += 2;
195 break;
196
197 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000198 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000199 out++;
200 in += 2;
201 break;
202
203 case 'b':
204 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000205 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000206 out++;
207 in += 2;
208 break;
209
210 case 'f':
211 *out = kFF_PdfWhiteSpace;
212 out++;
213 in += 2;
214 break;
215
216 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000217 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000218 out++;
219 in += 2;
220 break;
221
222 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000223 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000224 out++;
225 in += 2;
226 break;
227
228 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000229 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000230 out++;
231 in += 2;
232 break;
233
234 case '0':
235 case '1':
236 case '2':
237 case '3':
238 case '4':
239 case '5':
240 case '6':
241 case '7': {
242 //read octals
243 in++; // consume backslash
244
245 int code = 0;
246 int i = 0;
247 while (in < end && *in >= '0' && *in < '8') {
248 code = (code << 3) + ((*in) - '0'); // code * 8 + d
249 i++;
250 in++;
251 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000252 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000253 out++;
254 i = 0;
255 }
256 }
257 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000258 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000259 out++;
260 }
261 }
262 break;
263
264 default:
265 // Per spec, backslash is ignored is escaped ch is unknown
266 in++;
267 break;
268 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000269 } else {
270 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000271 }
272 } else {
273 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
274 // we could have one look that first just inc current, and when we find the backslash
275 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000276 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000277 in++;
278 out++;
279 }
280 }
281
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000282 if (hasOut) {
283 return in; // consumed already ) at the end of the string
284 } else {
285 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
286 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000287}
288
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000289static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
290 return readString(level, start, end, NULL) - start;
291}
292
293static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000294 if (!allocator) {
295 return end;
296 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000297 int outLength = readStringLength(level, start, end);
298 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
299 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
300 start = readString(level, start, end, out);
301 SkPdfObject::makeString(out, out + outLength, str);
302 TRACE_STRING(out, out + outLength);
303 return start; // consumed already ) at the end of the string
304}
305
306static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
307 TRACE_INDENT(level, "HexString");
308 bool hasOut = (out != NULL);
309 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000310
311 unsigned char code = 0;
312
313 while (in < end) {
314 while (in < end && isPdfWhiteSpace(*in)) {
315 in++;
316 }
317
318 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000319 //*in = '\0';
320 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000321 // normal exit
322 break;
323 }
324
325 if (in >= end) {
326 // end too soon
327 break;
328 }
329
330 switch (*in) {
331 case '0':
332 case '1':
333 case '2':
334 case '3':
335 case '4':
336 case '5':
337 case '6':
338 case '7':
339 case '8':
340 case '9':
341 code = (*in - '0') << 4;
342 break;
343
344 case 'a':
345 case 'b':
346 case 'c':
347 case 'd':
348 case 'e':
349 case 'f':
350 code = (*in - 'a' + 10) << 4;
351 break;
352
353 case 'A':
354 case 'B':
355 case 'C':
356 case 'D':
357 case 'E':
358 case 'F':
359 code = (*in - 'A' + 10) << 4;
360 break;
361
362 // TODO(edisonn): spec does not say how to handle this error
363 default:
364 break;
365 }
366
367 in++; // advance
368
369 while (in < end && isPdfWhiteSpace(*in)) {
370 in++;
371 }
372
373 // TODO(edisonn): report error
374 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000375 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000376 out++;
377 break;
378 }
379
380 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000381 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000382 out++;
383 break;
384 }
385
386 switch (*in) {
387 case '0':
388 case '1':
389 case '2':
390 case '3':
391 case '4':
392 case '5':
393 case '6':
394 case '7':
395 case '8':
396 case '9':
397 code += (*in - '0');
398 break;
399
400 case 'a':
401 case 'b':
402 case 'c':
403 case 'd':
404 case 'e':
405 case 'f':
406 code += (*in - 'a' + 10);
407 break;
408
409 case 'A':
410 case 'B':
411 case 'C':
412 case 'D':
413 case 'E':
414 case 'F':
415 code += (*in - 'A' + 10);
416 break;
417
418 // TODO(edisonn): spec does not say how to handle this error
419 default:
420 break;
421 }
422
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000423 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000424 out++;
425 in++;
426 }
427
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000428 if (hasOut) {
429 return in; // consumed already > at the end of the string
430 } else {
431 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000432 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000433}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000434
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000435static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
436 return readHexString(level, start, end, NULL) - start;
437}
438
439static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000440 if (!allocator) {
441 return end;
442 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000443 int outLength = readHexStringLength(level, start, end);
444 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
445 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
446 start = readHexString(level, start, end, out);
447 SkPdfObject::makeHexString(out, out + outLength, str);
448 TRACE_HEXSTRING(out, out + outLength);
449 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000450}
451
452// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000453static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
454 TRACE_INDENT(level, "Name");
455 bool hasOut = (out != NULL);
456 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000457
458 unsigned char code = 0;
459
460 while (in < end) {
461 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
462 break;
463 }
464
465 if (*in == '#' && in + 2 < end) {
466 in++;
467 switch (*in) {
468 case '0':
469 case '1':
470 case '2':
471 case '3':
472 case '4':
473 case '5':
474 case '6':
475 case '7':
476 case '8':
477 case '9':
478 code = (*in - '0') << 4;
479 break;
480
481 case 'a':
482 case 'b':
483 case 'c':
484 case 'd':
485 case 'e':
486 case 'f':
487 code = (*in - 'a' + 10) << 4;
488 break;
489
490 case 'A':
491 case 'B':
492 case 'C':
493 case 'D':
494 case 'E':
495 case 'F':
496 code = (*in - 'A' + 10) << 4;
497 break;
498
499 // TODO(edisonn): spec does not say how to handle this error
500 default:
501 break;
502 }
503
504 in++; // advance
505
506 switch (*in) {
507 case '0':
508 case '1':
509 case '2':
510 case '3':
511 case '4':
512 case '5':
513 case '6':
514 case '7':
515 case '8':
516 case '9':
517 code += (*in - '0');
518 break;
519
520 case 'a':
521 case 'b':
522 case 'c':
523 case 'd':
524 case 'e':
525 case 'f':
526 code += (*in - 'a' + 10);
527 break;
528
529 case 'A':
530 case 'B':
531 case 'C':
532 case 'D':
533 case 'E':
534 case 'F':
535 code += (*in - 'A' + 10);
536 break;
537
538 // TODO(edisonn): spec does not say how to handle this error
539 default:
540 break;
541 }
542
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000543 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000544 out++;
545 in++;
546 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000547 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000548 out++;
549 in++;
550 }
551 }
552
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000553 if (hasOut) {
554 return in;
555 } else {
556 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
557 }
558}
559
560static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
561 return readName(level, start, end, NULL) - start;
562}
563
564static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000565 if (!allocator) {
566 return end;
567 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000568 int outLength = readNameLength(level, start, end);
569 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
570 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
571 start = readName(level, start, end, out);
572 SkPdfObject::makeName(out, out + outLength, name);
573 TRACE_NAME(out, out + outLength);
574 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000575}
576
577// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
578// that makes for an interesting scenario, where the stream itself contains endstream, together
579// with a reference object with the length, but the real length object would be somewhere else
580// it could confuse the parser
581/*example:
582
5837 0 obj
584<< /length 8 0 R>>
585stream
586...............
587endstream
5888 0 obj #we are in stream actually, not a real object
589<< 10 >> #we are in stream actually, not a real object
590endobj
591endstream
5928 0 obj #real obj
593<< 100 >> #real obj
594endobj
595and it could get worse, with multiple object like this
596*/
597
598// right now implement the silly algorithm that assumes endstream is finishing the stream
599
600
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000601static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
602 TRACE_INDENT(level, "Stream");
603 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000604 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
605 // no stream. return.
606 return start;
607 }
608
609 start += 6; // strlen("stream")
610 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
611 start += 2;
612 } else if (start[0] == kLF_PdfWhiteSpace) {
613 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000614 } else if (isPdfWhiteSpace(start[0])) {
615 start += 1;
616 } else {
617 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
618 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000619 }
620
621 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
622 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000623 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000624
625 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000626 if (stream->has_Length() && stream->Length(doc) > 0) {
627 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000628 }
629
630 // TODO(edisonn): laod external streams
631 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
632
633 if (length < 0) {
634 // scan the buffer, until we find first endstream
635 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000636 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000637
638 if (endstream) {
639 length = endstream - start;
640 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000641 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000642 }
643 }
644 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000645 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000646
647 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
648 endstream += 2;
649 } else if (endstream[0] == kLF_PdfWhiteSpace) {
650 endstream += 1;
651 }
652
653 // TODO(edisonn): verify the next bytes are "endstream"
654
655 endstream += strlen("endstream");
656 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000657 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000658 return endstream;
659 }
660 return start;
661}
662
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000663static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
664 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000665 // We already processed ID keyword, and we should be positioned immediately after it
666
667 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
668 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
669 start += 2;
670 } else if (start[0] == kLF_PdfWhiteSpace) {
671 start += 1;
672 } else if (isPdfWhiteSpace(start[0])) {
673 start += 1;
674 } else {
675 SkASSERT(isPdfDelimiter(start[0]));
676 // TODO(edisonn): warning?
677 }
678
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000679 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
680 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000681
682 if (endstream) {
683 int length = endstream - start;
684 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
685 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
686 inlineImage->addStream(start, (size_t)length);
687 } else {
688 // TODO(edisonn): report error in inline image stream (ID-EI) section
689 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
690 return end;
691 }
692 return endEI;
693}
694
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000695static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
696 TRACE_INDENT(level, "Dictionary");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000697 SkPdfObject::makeEmptyDictionary(dict);
698
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000699 start = skipPdfWhiteSpaces(level, start, end);
700 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000701
702 while (start < end && *start == kNamed_PdfDelimiter) {
703 SkPdfObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000704 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000705 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000706 start = readName(level + 1, start, end, &key, &tmpStorage);
707 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000708
709 if (start < end) {
710 SkPdfObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000711 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000712
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000713 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000714
715 if (start < end) {
716 // seems we have an indirect reference
717 if (isPdfDigit(*start)) {
718 SkPdfObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000719 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000720
721 SkPdfObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000722 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000723
724 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
725 int64_t id = value->intValue();
726 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000727 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000728 dict->set(&key, value);
729 } else {
730 // error, ignore
731 dict->set(&key, value);
732 }
733 } else {
734 // next elem is not a digit, but it might not be / either!
735 dict->set(&key, value);
736 }
737 } else {
738 // /key >>
739 dict->set(&key, value);
740 return end;
741 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000742 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743 } else {
744 dict->set(&key, &SkPdfObject::kNull);
745 return end;
746 }
747 }
748
749 // TODO(edisonn): options to ignore these errors
750
751 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000752 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000753 if (*start != kClosedInequityBracket_PdfDelimiter) {
754 // TODO(edisonn): report/warning
755 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000756 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000757 start++; // skip >
758 if (*start != kClosedInequityBracket_PdfDelimiter) {
759 // TODO(edisonn): report/warning
760 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000761 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000762 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000763
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000764 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000765
766 return start;
767}
768
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000769const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
770 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000771
772 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000773 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000774
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000775 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000776
777 // no token, len would be 0
778 if (current == start) {
779 return NULL;
780 }
781
782 int tokenLen = current - start;
783
784 if (tokenLen == 1) {
785 // start array
786 switch (*start) {
787 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000788 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000789 SkPdfObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000790 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000791
792 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000793 //*start = '\0';
794 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000795
796 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000797 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000798 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000799 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000801 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000802 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000803 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000804 }
805
806 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000807 //*start = '\0';
808 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000809
810 // TODO(edisonn): what to do curly brackets? read spec!
811 case kOpenedCurlyBracket_PdfDelimiter:
812 default:
813 break;
814 }
815
816 SkASSERT(!isPdfWhiteSpace(*start));
817 if (isPdfDelimiter(*start)) {
818 // TODO(edisonn): how stream ] } > ) will be handled?
819 // for now ignore, and it will become a keyword to be ignored
820 }
821 }
822
823 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
824 SkPdfObject::makeNull(token);
825 return current;
826 }
827
828 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
829 SkPdfObject::makeBoolean(true, token);
830 return current;
831 }
832
833 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
834 SkPdfObject::makeBoolean(false, token);
835 return current;
836 }
837
838 if (isPdfNumeric(*start)) {
839 SkPdfObject::makeNumeric(start, current, token);
840 } else {
841 SkPdfObject::makeKeyword(start, current, token);
842 }
843 return current;
844}
845
846SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000847 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000848 return new SkPdfObject[BUFFER_SIZE];
849}
850
851SkPdfAllocator::~SkPdfAllocator() {
852 for (int i = 0 ; i < fHandles.count(); i++) {
853 free(fHandles[i]);
854 }
855 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000856 for (int j = 0 ; j < BUFFER_SIZE; j++) {
857 fHistory[i][j].reset();
858 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000859 delete[] fHistory[i];
860 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000861 for (int j = 0 ; j < BUFFER_SIZE; j++) {
862 fCurrent[j].reset();
863 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000864 delete[] fCurrent;
865}
866
867SkPdfObject* SkPdfAllocator::allocObject() {
868 if (fCurrentUsed >= BUFFER_SIZE) {
869 fHistory.push(fCurrent);
870 fCurrent = allocBlock();
871 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000872 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000873 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000874 fCurrentUsed++;
875 return &fCurrent[fCurrentUsed - 1];
876}
877
878// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000879SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000880 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000881 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000882 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000883 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000884 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000885 if (endobj) {
886 len = endobj - (char*)buffer + strlen("endobj");
887 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000888 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000889 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000890}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000891
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000892SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000893 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000894 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000895 if (endobj) {
896 len = endobj - (char*)buffer + strlen("endobj");
897 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000898 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000899 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000900}
901
902SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000903}
904
905bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
906 token->fKeyword = NULL;
907 token->fObject = NULL;
908
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000909 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000910 if (fUncompressedStream >= fUncompressedStreamEnd) {
911 return false;
912 }
913
914 SkPdfObject obj;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000915 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000916
917 // If it is a keyword, we will only get the pointer of the string
918 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
919 token->fKeyword = obj.c_str();
920 token->fKeywordLength = obj.len();
921 token->fType = kKeyword_TokenType;
922 } else {
923 SkPdfObject* pobj = fAllocator->allocObject();
924 *pobj = obj;
925 token->fObject = pobj;
926 token->fType = kObject_TokenType;
927 }
928
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000929#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000930 static int read_op = 0;
931 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000932 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000933 printf("break;\n");
934 }
935 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
936#endif
937
938 return true;
939}
940
941void SkPdfNativeTokenizer::PutBack(PdfToken token) {
942 SkASSERT(!fHasPutBack);
943 fHasPutBack = true;
944 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000945#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000946 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
947#endif
948}
949
950bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
951 if (fHasPutBack) {
952 *token = fPutBack;
953 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000954#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000955 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
956#endif
957 return true;
958 }
959
960 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000961#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000962 printf("EMPTY TOKENIZER\n");
963#endif
964 return false;
965 }
966
967 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000968}
edisonn@google.com78b38b12013-07-15 18:20:58 +0000969
970#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
971
972// keys
973DECLARE_PDF_NAME(BitsPerComponent);
974DECLARE_PDF_NAME(ColorSpace);
975DECLARE_PDF_NAME(Decode);
976DECLARE_PDF_NAME(DecodeParms);
977DECLARE_PDF_NAME(Filter);
978DECLARE_PDF_NAME(Height);
979DECLARE_PDF_NAME(ImageMask);
980DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
981DECLARE_PDF_NAME(Interpolate);
982DECLARE_PDF_NAME(Width);
983
984// values
985DECLARE_PDF_NAME(DeviceGray);
986DECLARE_PDF_NAME(DeviceRGB);
987DECLARE_PDF_NAME(DeviceCMYK);
988DECLARE_PDF_NAME(Indexed);
989DECLARE_PDF_NAME(ASCIIHexDecode);
990DECLARE_PDF_NAME(ASCII85Decode);
991DECLARE_PDF_NAME(LZWDecode);
992DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
993DECLARE_PDF_NAME(RunLengthDecode);
994DECLARE_PDF_NAME(CCITTFaxDecode);
995DECLARE_PDF_NAME(DCTDecode);
996
997#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
998
999
1000static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
1001 if (!key || !key->isName()) {
1002 return key;
1003 }
1004
1005 // TODO(edisonn): use autogenerated code!
1006 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1007 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1008 HANDLE_NAME_ABBR(key, Decode, D);
1009 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1010 HANDLE_NAME_ABBR(key, Filter, F);
1011 HANDLE_NAME_ABBR(key, Height, H);
1012 HANDLE_NAME_ABBR(key, ImageMask, IM);
1013// HANDLE_NAME_ABBR(key, Intent, );
1014 HANDLE_NAME_ABBR(key, Interpolate, I);
1015 HANDLE_NAME_ABBR(key, Width, W);
1016
1017 return key;
1018}
1019
1020static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
1021 if (!value || !value->isName()) {
1022 return value;
1023 }
1024
1025 // TODO(edisonn): use autogenerated code!
1026 HANDLE_NAME_ABBR(value, DeviceGray, G);
1027 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1028 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1029 HANDLE_NAME_ABBR(value, Indexed, I);
1030 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1031 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1032 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1033 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1034 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1035 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1036 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1037
1038 return value;
1039}
1040
1041SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1042 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001043 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001044 if (fUncompressedStream >= fUncompressedStreamEnd) {
1045 return NULL;
1046 }
1047
1048 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
1049 SkPdfObject::makeEmptyDictionary(inlineImage);
1050
1051 while (fUncompressedStream < fUncompressedStreamEnd) {
1052 SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001053 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001054
1055 if (key->isKeyword() && key->len() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001056 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001057 return inlineImage;
1058 } else {
1059 SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001060 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001061 // TODO(edisonn): perf maybe we should not expand abreviation like this
1062 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1063 inlineImageValueAbbreviationExpand(obj));
1064 }
1065 }
1066 // TODO(edisonn): report end of data with inline image without an EI
1067 return inlineImage;
1068}