blob: 09b7a0b3a88fdaca17f01fafb06a3e46a1c8d72d [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com78b38b12013-07-15 18:20:58 +00007#include "SkPdfImageDictionary_autogen.h"
8
9// TODO(edisonn): perf!!!
10// there could be 0s between start and end! but not in the needle.
11static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
12 int needleLen = strlen(needle);
13 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
14 strncmp(hayStart, needle, needleLen) == 0) {
15 return hayStart;
16 }
17
18 hayStart++;
19
20 while (hayStart < hayEnd) {
21 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
22 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26 hayStart++;
27 }
28 return NULL;
29}
30
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000031#ifdef PDF_TRACE
32static void TRACE_INDENT(int level, const char* type) {
33 static int id = 0;
34 id++;
35 if (478613 == id) {
36 printf("break;\n");
37 }
38 // all types should have 2 letters, so the text is alligned nicely
39 printf("\n%10i %15s: ", id, type);
40 for (int i = 0 ; i < level; i++) {
41 printf(" ");
42 }
43}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000044
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045static void TRACE_COMMENT(char ch) {
46 printf("%c", ch);
47}
48
49static void TRACE_TK(char ch) {
50 printf("%c", ch);
51}
52
53static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
54 while (start < end) {
55 printf("%c", *start);
56 start++;
57 }
58 printf("\n");
59}
60
61static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
62 while (start < end) {
63 printf("%c", *start);
64 start++;
65 }
66 printf("\n");
67}
68
69static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
70 while (start < end) {
71 printf("%c", *start);
72 start++;
73 }
74 printf("\n");
75}
76
77#else
78#define TRACE_INDENT(level,type)
79#define TRACE_COMMENT(ch)
80#define TRACE_TK(ch)
81#define TRACE_NAME(start,end)
82#define TRACE_STRING(start,end)
83#define TRACE_HEXSTRING(start,end)
84#endif
85
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000086const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000087 TRACE_INDENT(level, "White Space");
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000088 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000089 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000090 if (*start == kComment_PdfDelimiter) {
91 // skip the comment until end of line
92 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000093 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000094 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000095 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000096 }
97 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 start++;
100 }
101 }
102 return start;
103}
104
105// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000106const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 //int opened brackets
108 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110
111 SkASSERT(!isPdfWhiteSpace(*start));
112
113 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000114 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115 start++;
116 return start;
117 }
118
119 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121 start++;
122 }
123 return start;
124}
125
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126// last elem has to be ]
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000127static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000128 if (allocator == NULL) {
129 // TODO(edisonn): report/warning error
130 return end;
131 }
132
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000133 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134 while (start < end) {
135 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000136 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000137
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000138 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139
140 if (endOfToken == start) {
141 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
142 return start;
143 }
144
145 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
146 return endOfToken;
147 }
148
149 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000150 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000151 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
152 // we are sure they are not references!
153 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
154 SkPdfObject* gen = array->removeLastInArray();
155 SkPdfObject* id = array->removeLastInArray();
156 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000157 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000158 }
159 array->appendInArray(newObj);
160 }
edisonn@google.com78b38b12013-07-15 18:20:58 +0000161 printf("break;\n"); // DO NOT SUBMIT!
edisonn@google.com571c70b2013-07-10 17:09:50 +0000162 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000163 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
164 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000165 return start;
166}
167
168// When we read strings we will rewrite the string so we will reuse the memory
169// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000170
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000171// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
172
173static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
174 TRACE_INDENT(level, "String");
175 const unsigned char* in = start;
176 bool hasOut = (out != NULL);
177
178 int openRoundBrackets = 1;
179 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000180 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
181 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000182 if (openRoundBrackets == 0) {
183 in++; // consumed )
184 break;
185 }
186
edisonn@google.com571c70b2013-07-10 17:09:50 +0000187 if (*in == kEscape_PdfSpecial) {
188 if (in + 1 < end) {
189 switch (in[1]) {
190 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000191 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000192 out++;
193 in += 2;
194 break;
195
196 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000197 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000198 out++;
199 in += 2;
200 break;
201
202 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000203 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000204 out++;
205 in += 2;
206 break;
207
208 case 'b':
209 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000210 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000211 out++;
212 in += 2;
213 break;
214
215 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000216 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000217 out++;
218 in += 2;
219 break;
220
221 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000222 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000223 out++;
224 in += 2;
225 break;
226
227 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000228 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000229 out++;
230 in += 2;
231 break;
232
233 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000234 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000235 out++;
236 in += 2;
237 break;
238
239 case '0':
240 case '1':
241 case '2':
242 case '3':
243 case '4':
244 case '5':
245 case '6':
246 case '7': {
247 //read octals
248 in++; // consume backslash
249
250 int code = 0;
251 int i = 0;
252 while (in < end && *in >= '0' && *in < '8') {
253 code = (code << 3) + ((*in) - '0'); // code * 8 + d
254 i++;
255 in++;
256 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000257 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000258 out++;
259 i = 0;
260 }
261 }
262 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000263 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000264 out++;
265 }
266 }
267 break;
268
269 default:
270 // Per spec, backslash is ignored is escaped ch is unknown
271 in++;
272 break;
273 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000274 } else {
275 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000276 }
277 } else {
278 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
279 // we could have one look that first just inc current, and when we find the backslash
280 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000281 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000282 in++;
283 out++;
284 }
285 }
286
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000287 if (hasOut) {
288 return in; // consumed already ) at the end of the string
289 } else {
290 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
291 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000292}
293
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000294static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
295 return readString(level, start, end, NULL) - start;
296}
297
298static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000299 if (!allocator) {
300 return end;
301 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000302 int outLength = readStringLength(level, start, end);
303 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
304 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
305 start = readString(level, start, end, out);
306 SkPdfObject::makeString(out, out + outLength, str);
307 TRACE_STRING(out, out + outLength);
308 return start; // consumed already ) at the end of the string
309}
310
311static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
312 TRACE_INDENT(level, "HexString");
313 bool hasOut = (out != NULL);
314 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000315
316 unsigned char code = 0;
317
318 while (in < end) {
319 while (in < end && isPdfWhiteSpace(*in)) {
320 in++;
321 }
322
323 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000324 //*in = '\0';
325 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000326 // normal exit
327 break;
328 }
329
330 if (in >= end) {
331 // end too soon
332 break;
333 }
334
335 switch (*in) {
336 case '0':
337 case '1':
338 case '2':
339 case '3':
340 case '4':
341 case '5':
342 case '6':
343 case '7':
344 case '8':
345 case '9':
346 code = (*in - '0') << 4;
347 break;
348
349 case 'a':
350 case 'b':
351 case 'c':
352 case 'd':
353 case 'e':
354 case 'f':
355 code = (*in - 'a' + 10) << 4;
356 break;
357
358 case 'A':
359 case 'B':
360 case 'C':
361 case 'D':
362 case 'E':
363 case 'F':
364 code = (*in - 'A' + 10) << 4;
365 break;
366
367 // TODO(edisonn): spec does not say how to handle this error
368 default:
369 break;
370 }
371
372 in++; // advance
373
374 while (in < end && isPdfWhiteSpace(*in)) {
375 in++;
376 }
377
378 // TODO(edisonn): report error
379 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000380 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000381 out++;
382 break;
383 }
384
385 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000386 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000387 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000388 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000389 break;
390 }
391
392 switch (*in) {
393 case '0':
394 case '1':
395 case '2':
396 case '3':
397 case '4':
398 case '5':
399 case '6':
400 case '7':
401 case '8':
402 case '9':
403 code += (*in - '0');
404 break;
405
406 case 'a':
407 case 'b':
408 case 'c':
409 case 'd':
410 case 'e':
411 case 'f':
412 code += (*in - 'a' + 10);
413 break;
414
415 case 'A':
416 case 'B':
417 case 'C':
418 case 'D':
419 case 'E':
420 case 'F':
421 code += (*in - 'A' + 10);
422 break;
423
424 // TODO(edisonn): spec does not say how to handle this error
425 default:
426 break;
427 }
428
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000429 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000430 out++;
431 in++;
432 }
433
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000434 if (hasOut) {
435 return in; // consumed already > at the end of the string
436 } else {
437 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000438 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000439}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000440
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000441static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
442 return readHexString(level, start, end, NULL) - start;
443}
444
445static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000446 if (!allocator) {
447 return end;
448 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000449 int outLength = readHexStringLength(level, start, end);
450 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
451 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
452 start = readHexString(level, start, end, out);
453 SkPdfObject::makeHexString(out, out + outLength, str);
454 TRACE_HEXSTRING(out, out + outLength);
455 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000456}
457
458// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000459static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
460 TRACE_INDENT(level, "Name");
461 bool hasOut = (out != NULL);
462 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000463
464 unsigned char code = 0;
465
466 while (in < end) {
467 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
468 break;
469 }
470
471 if (*in == '#' && in + 2 < end) {
472 in++;
473 switch (*in) {
474 case '0':
475 case '1':
476 case '2':
477 case '3':
478 case '4':
479 case '5':
480 case '6':
481 case '7':
482 case '8':
483 case '9':
484 code = (*in - '0') << 4;
485 break;
486
487 case 'a':
488 case 'b':
489 case 'c':
490 case 'd':
491 case 'e':
492 case 'f':
493 code = (*in - 'a' + 10) << 4;
494 break;
495
496 case 'A':
497 case 'B':
498 case 'C':
499 case 'D':
500 case 'E':
501 case 'F':
502 code = (*in - 'A' + 10) << 4;
503 break;
504
505 // TODO(edisonn): spec does not say how to handle this error
506 default:
507 break;
508 }
509
510 in++; // advance
511
512 switch (*in) {
513 case '0':
514 case '1':
515 case '2':
516 case '3':
517 case '4':
518 case '5':
519 case '6':
520 case '7':
521 case '8':
522 case '9':
523 code += (*in - '0');
524 break;
525
526 case 'a':
527 case 'b':
528 case 'c':
529 case 'd':
530 case 'e':
531 case 'f':
532 code += (*in - 'a' + 10);
533 break;
534
535 case 'A':
536 case 'B':
537 case 'C':
538 case 'D':
539 case 'E':
540 case 'F':
541 code += (*in - 'A' + 10);
542 break;
543
544 // TODO(edisonn): spec does not say how to handle this error
545 default:
546 break;
547 }
548
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000549 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000550 out++;
551 in++;
552 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000553 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000554 out++;
555 in++;
556 }
557 }
558
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000559 if (hasOut) {
560 return in;
561 } else {
562 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
563 }
564}
565
566static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
567 return readName(level, start, end, NULL) - start;
568}
569
570static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000571 if (!allocator) {
572 return end;
573 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000574 int outLength = readNameLength(level, start, end);
575 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
576 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
577 start = readName(level, start, end, out);
578 SkPdfObject::makeName(out, out + outLength, name);
579 TRACE_NAME(out, out + outLength);
580 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000581}
582
583// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
584// that makes for an interesting scenario, where the stream itself contains endstream, together
585// with a reference object with the length, but the real length object would be somewhere else
586// it could confuse the parser
587/*example:
588
5897 0 obj
590<< /length 8 0 R>>
591stream
592...............
593endstream
5948 0 obj #we are in stream actually, not a real object
595<< 10 >> #we are in stream actually, not a real object
596endobj
597endstream
5988 0 obj #real obj
599<< 100 >> #real obj
600endobj
601and it could get worse, with multiple object like this
602*/
603
604// right now implement the silly algorithm that assumes endstream is finishing the stream
605
606
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000607static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
608 TRACE_INDENT(level, "Stream");
609 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000610 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
611 // no stream. return.
612 return start;
613 }
614
615 start += 6; // strlen("stream")
616 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
617 start += 2;
618 } else if (start[0] == kLF_PdfWhiteSpace) {
619 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000620 } else if (isPdfWhiteSpace(start[0])) {
621 start += 1;
622 } else {
623 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
624 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000625 }
626
627 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
628 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000629 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000630
631 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000632 if (stream->has_Length() && stream->Length(doc) > 0) {
633 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000634 }
635
636 // TODO(edisonn): laod external streams
637 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
638
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000639
640 if (length >= 0) {
641 const unsigned char* endstream = start + length;
642
643 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
644 endstream += 2;
645 } else if (endstream[0] == kLF_PdfWhiteSpace) {
646 endstream += 1;
647 }
648
649 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
650 length = -1;
651 }
652 }
653
edisonn@google.com571c70b2013-07-10 17:09:50 +0000654 if (length < 0) {
655 // scan the buffer, until we find first endstream
656 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000657 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000658
659 if (endstream) {
660 length = endstream - start;
661 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000662 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000663 }
664 }
665 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000666 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000667
668 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
669 endstream += 2;
670 } else if (endstream[0] == kLF_PdfWhiteSpace) {
671 endstream += 1;
672 }
673
674 // TODO(edisonn): verify the next bytes are "endstream"
675
676 endstream += strlen("endstream");
677 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000678 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000679 return endstream;
680 }
681 return start;
682}
683
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000684static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
685 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000686 // We already processed ID keyword, and we should be positioned immediately after it
687
688 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
689 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
690 start += 2;
691 } else if (start[0] == kLF_PdfWhiteSpace) {
692 start += 1;
693 } else if (isPdfWhiteSpace(start[0])) {
694 start += 1;
695 } else {
696 SkASSERT(isPdfDelimiter(start[0]));
697 // TODO(edisonn): warning?
698 }
699
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000700 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
701 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000702
703 if (endstream) {
704 int length = endstream - start;
705 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
706 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
707 inlineImage->addStream(start, (size_t)length);
708 } else {
709 // TODO(edisonn): report error in inline image stream (ID-EI) section
710 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
711 return end;
712 }
713 return endEI;
714}
715
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000716static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000717 if (allocator == NULL) {
718 // TODO(edisonn): report/warning error
719 return end;
720 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000721 TRACE_INDENT(level, "Dictionary");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000722 SkPdfObject::makeEmptyDictionary(dict);
723
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000724 start = skipPdfWhiteSpaces(level, start, end);
725 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000726
727 while (start < end && *start == kNamed_PdfDelimiter) {
728 SkPdfObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000729 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000730 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000731 start = readName(level + 1, start, end, &key, &tmpStorage);
732 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000733
734 if (start < end) {
735 SkPdfObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000736 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000737
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000738 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000739
740 if (start < end) {
741 // seems we have an indirect reference
742 if (isPdfDigit(*start)) {
743 SkPdfObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000744 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000745
746 SkPdfObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000747 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000748
749 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
750 int64_t id = value->intValue();
751 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000752 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000753 dict->set(&key, value);
754 } else {
755 // error, ignore
756 dict->set(&key, value);
757 }
758 } else {
759 // next elem is not a digit, but it might not be / either!
760 dict->set(&key, value);
761 }
762 } else {
763 // /key >>
764 dict->set(&key, value);
765 return end;
766 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000767 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000768 } else {
769 dict->set(&key, &SkPdfObject::kNull);
770 return end;
771 }
772 }
773
774 // TODO(edisonn): options to ignore these errors
775
776 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000777 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000778 if (*start != kClosedInequityBracket_PdfDelimiter) {
779 // TODO(edisonn): report/warning
780 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000781 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000782 start++; // skip >
783 if (*start != kClosedInequityBracket_PdfDelimiter) {
784 // TODO(edisonn): report/warning
785 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000786 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000787 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000788
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000789 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000790
791 return start;
792}
793
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000794const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
795 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000796
797 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000798 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000799
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000800 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000801
802 // no token, len would be 0
803 if (current == start) {
804 return NULL;
805 }
806
807 int tokenLen = current - start;
808
809 if (tokenLen == 1) {
810 // start array
811 switch (*start) {
812 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000813 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000814 SkPdfObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000815 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000816
817 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000818 //*start = '\0';
819 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000820
821 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000822 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000823 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000824 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000825 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000826 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000827 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000828 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000829 }
830
831 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000832 //*start = '\0';
833 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000834
835 // TODO(edisonn): what to do curly brackets? read spec!
836 case kOpenedCurlyBracket_PdfDelimiter:
837 default:
838 break;
839 }
840
841 SkASSERT(!isPdfWhiteSpace(*start));
842 if (isPdfDelimiter(*start)) {
843 // TODO(edisonn): how stream ] } > ) will be handled?
844 // for now ignore, and it will become a keyword to be ignored
845 }
846 }
847
848 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
849 SkPdfObject::makeNull(token);
850 return current;
851 }
852
853 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
854 SkPdfObject::makeBoolean(true, token);
855 return current;
856 }
857
858 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
859 SkPdfObject::makeBoolean(false, token);
860 return current;
861 }
862
863 if (isPdfNumeric(*start)) {
864 SkPdfObject::makeNumeric(start, current, token);
865 } else {
866 SkPdfObject::makeKeyword(start, current, token);
867 }
868 return current;
869}
870
871SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000872 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000873 return new SkPdfObject[BUFFER_SIZE];
874}
875
876SkPdfAllocator::~SkPdfAllocator() {
877 for (int i = 0 ; i < fHandles.count(); i++) {
878 free(fHandles[i]);
879 }
880 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000881 for (int j = 0 ; j < BUFFER_SIZE; j++) {
882 fHistory[i][j].reset();
883 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000884 delete[] fHistory[i];
885 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000886 for (int j = 0 ; j < BUFFER_SIZE; j++) {
887 fCurrent[j].reset();
888 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000889 delete[] fCurrent;
890}
891
892SkPdfObject* SkPdfAllocator::allocObject() {
893 if (fCurrentUsed >= BUFFER_SIZE) {
894 fHistory.push(fCurrent);
895 fCurrent = allocBlock();
896 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000897 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000898 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000899 fCurrentUsed++;
900 return &fCurrent[fCurrentUsed - 1];
901}
902
903// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000904SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000905 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000906 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000907 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000908 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000909 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000910 if (endobj) {
911 len = endobj - (char*)buffer + strlen("endobj");
912 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000913 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000914 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000915}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000916
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000917SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000918 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000919 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000920 if (endobj) {
921 len = endobj - (char*)buffer + strlen("endobj");
922 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000923 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000924 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000925}
926
927SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000928}
929
930bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
931 token->fKeyword = NULL;
932 token->fObject = NULL;
933
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000934 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000935 if (fUncompressedStream >= fUncompressedStreamEnd) {
936 return false;
937 }
938
939 SkPdfObject obj;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000940 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000941
942 // If it is a keyword, we will only get the pointer of the string
943 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
944 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000945 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000946 token->fType = kKeyword_TokenType;
947 } else {
948 SkPdfObject* pobj = fAllocator->allocObject();
949 *pobj = obj;
950 token->fObject = pobj;
951 token->fType = kObject_TokenType;
952 }
953
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000954#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000955 static int read_op = 0;
956 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000957 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000958 printf("break;\n");
959 }
960 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
961#endif
962
963 return true;
964}
965
966void SkPdfNativeTokenizer::PutBack(PdfToken token) {
967 SkASSERT(!fHasPutBack);
968 fHasPutBack = true;
969 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000970#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000971 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
972#endif
973}
974
975bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
976 if (fHasPutBack) {
977 *token = fPutBack;
978 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000979#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000980 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
981#endif
982 return true;
983 }
984
985 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000986#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000987 printf("EMPTY TOKENIZER\n");
988#endif
989 return false;
990 }
991
992 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000993}
edisonn@google.com78b38b12013-07-15 18:20:58 +0000994
995#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
996
997// keys
998DECLARE_PDF_NAME(BitsPerComponent);
999DECLARE_PDF_NAME(ColorSpace);
1000DECLARE_PDF_NAME(Decode);
1001DECLARE_PDF_NAME(DecodeParms);
1002DECLARE_PDF_NAME(Filter);
1003DECLARE_PDF_NAME(Height);
1004DECLARE_PDF_NAME(ImageMask);
1005DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1006DECLARE_PDF_NAME(Interpolate);
1007DECLARE_PDF_NAME(Width);
1008
1009// values
1010DECLARE_PDF_NAME(DeviceGray);
1011DECLARE_PDF_NAME(DeviceRGB);
1012DECLARE_PDF_NAME(DeviceCMYK);
1013DECLARE_PDF_NAME(Indexed);
1014DECLARE_PDF_NAME(ASCIIHexDecode);
1015DECLARE_PDF_NAME(ASCII85Decode);
1016DECLARE_PDF_NAME(LZWDecode);
1017DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1018DECLARE_PDF_NAME(RunLengthDecode);
1019DECLARE_PDF_NAME(CCITTFaxDecode);
1020DECLARE_PDF_NAME(DCTDecode);
1021
1022#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1023
1024
1025static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
1026 if (!key || !key->isName()) {
1027 return key;
1028 }
1029
1030 // TODO(edisonn): use autogenerated code!
1031 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1032 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1033 HANDLE_NAME_ABBR(key, Decode, D);
1034 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1035 HANDLE_NAME_ABBR(key, Filter, F);
1036 HANDLE_NAME_ABBR(key, Height, H);
1037 HANDLE_NAME_ABBR(key, ImageMask, IM);
1038// HANDLE_NAME_ABBR(key, Intent, );
1039 HANDLE_NAME_ABBR(key, Interpolate, I);
1040 HANDLE_NAME_ABBR(key, Width, W);
1041
1042 return key;
1043}
1044
1045static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
1046 if (!value || !value->isName()) {
1047 return value;
1048 }
1049
1050 // TODO(edisonn): use autogenerated code!
1051 HANDLE_NAME_ABBR(value, DeviceGray, G);
1052 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1053 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1054 HANDLE_NAME_ABBR(value, Indexed, I);
1055 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1056 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1057 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1058 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1059 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1060 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1061 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1062
1063 return value;
1064}
1065
1066SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1067 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001068 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001069 if (fUncompressedStream >= fUncompressedStreamEnd) {
1070 return NULL;
1071 }
1072
1073 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
1074 SkPdfObject::makeEmptyDictionary(inlineImage);
1075
1076 while (fUncompressedStream < fUncompressedStreamEnd) {
1077 SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001078 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001079
edisonn@google.come878e722013-07-29 19:10:58 +00001080 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001081 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001082 return inlineImage;
1083 } else {
1084 SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001085 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001086 // TODO(edisonn): perf maybe we should not expand abreviation like this
1087 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1088 inlineImageValueAbbreviationExpand(obj));
1089 }
1090 }
1091 // TODO(edisonn): report end of data with inline image without an EI
1092 return inlineImage;
1093}