blob: a8881382b4acf58e6e03ae59019d3b00f3e37a28 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com78b38b12013-07-15 18:20:58 +00007#include "SkPdfImageDictionary_autogen.h"
8
9// TODO(edisonn): perf!!!
10// there could be 0s between start and end! but not in the needle.
11static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
12 int needleLen = strlen(needle);
13 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
14 strncmp(hayStart, needle, needleLen) == 0) {
15 return hayStart;
16 }
17
18 hayStart++;
19
20 while (hayStart < hayEnd) {
21 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
22 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26 hayStart++;
27 }
28 return NULL;
29}
30
edisonn@google.come2e01ff2013-08-02 20:24:48 +000031#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000032static void TRACE_INDENT(int level, const char* type) {
33 static int id = 0;
34 id++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +000035#if 0
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000036 if (478613 == id) {
37 printf("break;\n");
38 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +000039#endif
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000040 // all types should have 2 letters, so the text is alligned nicely
41 printf("\n%10i %15s: ", id, type);
42 for (int i = 0 ; i < level; i++) {
43 printf(" ");
44 }
45}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000046
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000047static void TRACE_COMMENT(char ch) {
48 printf("%c", ch);
49}
50
51static void TRACE_TK(char ch) {
52 printf("%c", ch);
53}
54
55static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
56 while (start < end) {
57 printf("%c", *start);
58 start++;
59 }
60 printf("\n");
61}
62
63static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
64 while (start < end) {
65 printf("%c", *start);
66 start++;
67 }
68 printf("\n");
69}
70
71static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
72 while (start < end) {
73 printf("%c", *start);
74 start++;
75 }
76 printf("\n");
77}
78
79#else
80#define TRACE_INDENT(level,type)
81#define TRACE_COMMENT(ch)
82#define TRACE_TK(ch)
83#define TRACE_NAME(start,end)
84#define TRACE_STRING(start,end)
85#define TRACE_HEXSTRING(start,end)
86#endif
87
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000088const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000089 TRACE_INDENT(level, "White Space");
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000090 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000091 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000092 if (*start == kComment_PdfDelimiter) {
93 // skip the comment until end of line
94 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000095 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000096 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000097 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000098 }
99 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000100 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000101 start++;
102 }
103 }
104 return start;
105}
106
107// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000108const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000109 //int opened brackets
110 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000111 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000112
113 SkASSERT(!isPdfWhiteSpace(*start));
114
115 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000116 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000117 start++;
118 return start;
119 }
120
121 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000122 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000123 start++;
124 }
125 return start;
126}
127
edisonn@google.com571c70b2013-07-10 17:09:50 +0000128// last elem has to be ]
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000129static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000130 if (allocator == NULL) {
131 // TODO(edisonn): report/warning error
132 return end;
133 }
134
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000135 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000136 while (start < end) {
137 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000138 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000140 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000141
142 if (endOfToken == start) {
143 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
144 return start;
145 }
146
147 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
148 return endOfToken;
149 }
150
151 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000152 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000153 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
154 // we are sure they are not references!
155 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
156 SkPdfObject* gen = array->removeLastInArray();
157 SkPdfObject* id = array->removeLastInArray();
158 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000159 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000160 }
161 array->appendInArray(newObj);
162 }
163 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000164 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
165 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000166 return start;
167}
168
169// When we read strings we will rewrite the string so we will reuse the memory
170// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000171
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000172// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
173
174static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
175 TRACE_INDENT(level, "String");
176 const unsigned char* in = start;
177 bool hasOut = (out != NULL);
178
179 int openRoundBrackets = 1;
180 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000181 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
182 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000183 if (openRoundBrackets == 0) {
184 in++; // consumed )
185 break;
186 }
187
edisonn@google.com571c70b2013-07-10 17:09:50 +0000188 if (*in == kEscape_PdfSpecial) {
189 if (in + 1 < end) {
190 switch (in[1]) {
191 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000192 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000193 out++;
194 in += 2;
195 break;
196
197 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000198 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000199 out++;
200 in += 2;
201 break;
202
203 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000204 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000205 out++;
206 in += 2;
207 break;
208
209 case 'b':
210 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000211 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000212 out++;
213 in += 2;
214 break;
215
216 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000217 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000218 out++;
219 in += 2;
220 break;
221
222 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000223 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000224 out++;
225 in += 2;
226 break;
227
228 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000229 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000230 out++;
231 in += 2;
232 break;
233
234 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000235 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000236 out++;
237 in += 2;
238 break;
239
240 case '0':
241 case '1':
242 case '2':
243 case '3':
244 case '4':
245 case '5':
246 case '6':
247 case '7': {
248 //read octals
249 in++; // consume backslash
250
251 int code = 0;
252 int i = 0;
253 while (in < end && *in >= '0' && *in < '8') {
254 code = (code << 3) + ((*in) - '0'); // code * 8 + d
255 i++;
256 in++;
257 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000258 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000259 out++;
260 i = 0;
261 }
262 }
263 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000264 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000265 out++;
266 }
267 }
268 break;
269
270 default:
271 // Per spec, backslash is ignored is escaped ch is unknown
272 in++;
273 break;
274 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000275 } else {
276 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000277 }
278 } else {
279 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
280 // we could have one look that first just inc current, and when we find the backslash
281 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000282 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000283 in++;
284 out++;
285 }
286 }
287
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000288 if (hasOut) {
289 return in; // consumed already ) at the end of the string
290 } else {
291 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
292 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000293}
294
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000295static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
296 return readString(level, start, end, NULL) - start;
297}
298
299static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000300 if (!allocator) {
301 return end;
302 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000303 int outLength = readStringLength(level, start, end);
304 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
305 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
306 start = readString(level, start, end, out);
307 SkPdfObject::makeString(out, out + outLength, str);
308 TRACE_STRING(out, out + outLength);
309 return start; // consumed already ) at the end of the string
310}
311
312static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
313 TRACE_INDENT(level, "HexString");
314 bool hasOut = (out != NULL);
315 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000316
317 unsigned char code = 0;
318
319 while (in < end) {
320 while (in < end && isPdfWhiteSpace(*in)) {
321 in++;
322 }
323
324 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000325 //*in = '\0';
326 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000327 // normal exit
328 break;
329 }
330
331 if (in >= end) {
332 // end too soon
333 break;
334 }
335
336 switch (*in) {
337 case '0':
338 case '1':
339 case '2':
340 case '3':
341 case '4':
342 case '5':
343 case '6':
344 case '7':
345 case '8':
346 case '9':
347 code = (*in - '0') << 4;
348 break;
349
350 case 'a':
351 case 'b':
352 case 'c':
353 case 'd':
354 case 'e':
355 case 'f':
356 code = (*in - 'a' + 10) << 4;
357 break;
358
359 case 'A':
360 case 'B':
361 case 'C':
362 case 'D':
363 case 'E':
364 case 'F':
365 code = (*in - 'A' + 10) << 4;
366 break;
367
368 // TODO(edisonn): spec does not say how to handle this error
369 default:
370 break;
371 }
372
373 in++; // advance
374
375 while (in < end && isPdfWhiteSpace(*in)) {
376 in++;
377 }
378
379 // TODO(edisonn): report error
380 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000381 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000382 out++;
383 break;
384 }
385
386 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000387 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000388 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000389 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000390 break;
391 }
392
393 switch (*in) {
394 case '0':
395 case '1':
396 case '2':
397 case '3':
398 case '4':
399 case '5':
400 case '6':
401 case '7':
402 case '8':
403 case '9':
404 code += (*in - '0');
405 break;
406
407 case 'a':
408 case 'b':
409 case 'c':
410 case 'd':
411 case 'e':
412 case 'f':
413 code += (*in - 'a' + 10);
414 break;
415
416 case 'A':
417 case 'B':
418 case 'C':
419 case 'D':
420 case 'E':
421 case 'F':
422 code += (*in - 'A' + 10);
423 break;
424
425 // TODO(edisonn): spec does not say how to handle this error
426 default:
427 break;
428 }
429
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000430 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000431 out++;
432 in++;
433 }
434
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000435 if (hasOut) {
436 return in; // consumed already > at the end of the string
437 } else {
438 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000439 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000440}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000441
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000442static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
443 return readHexString(level, start, end, NULL) - start;
444}
445
446static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000447 if (!allocator) {
448 return end;
449 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000450 int outLength = readHexStringLength(level, start, end);
451 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
452 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
453 start = readHexString(level, start, end, out);
454 SkPdfObject::makeHexString(out, out + outLength, str);
455 TRACE_HEXSTRING(out, out + outLength);
456 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000457}
458
459// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000460static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
461 TRACE_INDENT(level, "Name");
462 bool hasOut = (out != NULL);
463 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000464
465 unsigned char code = 0;
466
467 while (in < end) {
468 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
469 break;
470 }
471
472 if (*in == '#' && in + 2 < end) {
473 in++;
474 switch (*in) {
475 case '0':
476 case '1':
477 case '2':
478 case '3':
479 case '4':
480 case '5':
481 case '6':
482 case '7':
483 case '8':
484 case '9':
485 code = (*in - '0') << 4;
486 break;
487
488 case 'a':
489 case 'b':
490 case 'c':
491 case 'd':
492 case 'e':
493 case 'f':
494 code = (*in - 'a' + 10) << 4;
495 break;
496
497 case 'A':
498 case 'B':
499 case 'C':
500 case 'D':
501 case 'E':
502 case 'F':
503 code = (*in - 'A' + 10) << 4;
504 break;
505
506 // TODO(edisonn): spec does not say how to handle this error
507 default:
508 break;
509 }
510
511 in++; // advance
512
513 switch (*in) {
514 case '0':
515 case '1':
516 case '2':
517 case '3':
518 case '4':
519 case '5':
520 case '6':
521 case '7':
522 case '8':
523 case '9':
524 code += (*in - '0');
525 break;
526
527 case 'a':
528 case 'b':
529 case 'c':
530 case 'd':
531 case 'e':
532 case 'f':
533 code += (*in - 'a' + 10);
534 break;
535
536 case 'A':
537 case 'B':
538 case 'C':
539 case 'D':
540 case 'E':
541 case 'F':
542 code += (*in - 'A' + 10);
543 break;
544
545 // TODO(edisonn): spec does not say how to handle this error
546 default:
547 break;
548 }
549
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000550 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000551 out++;
552 in++;
553 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000554 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000555 out++;
556 in++;
557 }
558 }
559
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000560 if (hasOut) {
561 return in;
562 } else {
563 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
564 }
565}
566
567static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
568 return readName(level, start, end, NULL) - start;
569}
570
571static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000572 if (!allocator) {
573 return end;
574 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000575 int outLength = readNameLength(level, start, end);
576 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
577 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
578 start = readName(level, start, end, out);
579 SkPdfObject::makeName(out, out + outLength, name);
580 TRACE_NAME(out, out + outLength);
581 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000582}
583
584// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
585// that makes for an interesting scenario, where the stream itself contains endstream, together
586// with a reference object with the length, but the real length object would be somewhere else
587// it could confuse the parser
588/*example:
589
5907 0 obj
591<< /length 8 0 R>>
592stream
593...............
594endstream
5958 0 obj #we are in stream actually, not a real object
596<< 10 >> #we are in stream actually, not a real object
597endobj
598endstream
5998 0 obj #real obj
600<< 100 >> #real obj
601endobj
602and it could get worse, with multiple object like this
603*/
604
605// right now implement the silly algorithm that assumes endstream is finishing the stream
606
607
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000608static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
609 TRACE_INDENT(level, "Stream");
610 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000611 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
612 // no stream. return.
613 return start;
614 }
615
616 start += 6; // strlen("stream")
617 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
618 start += 2;
619 } else if (start[0] == kLF_PdfWhiteSpace) {
620 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000621 } else if (isPdfWhiteSpace(start[0])) {
622 start += 1;
623 } else {
624 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
625 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000626 }
627
628 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
629 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000630 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000631
632 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000633 if (stream->has_Length() && stream->Length(doc) > 0) {
634 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000635 }
636
637 // TODO(edisonn): laod external streams
638 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
639
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000640
641 if (length >= 0) {
642 const unsigned char* endstream = start + length;
643
644 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
645 endstream += 2;
646 } else if (endstream[0] == kLF_PdfWhiteSpace) {
647 endstream += 1;
648 }
649
650 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
651 length = -1;
652 }
653 }
654
edisonn@google.com571c70b2013-07-10 17:09:50 +0000655 if (length < 0) {
656 // scan the buffer, until we find first endstream
657 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000658 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000659
660 if (endstream) {
661 length = endstream - start;
662 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000663 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000664 }
665 }
666 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000667 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000668
669 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
670 endstream += 2;
671 } else if (endstream[0] == kLF_PdfWhiteSpace) {
672 endstream += 1;
673 }
674
675 // TODO(edisonn): verify the next bytes are "endstream"
676
677 endstream += strlen("endstream");
678 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000679 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000680 return endstream;
681 }
682 return start;
683}
684
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000685static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkNativeParsedPDF* doc) {
686 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000687 // We already processed ID keyword, and we should be positioned immediately after it
688
689 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
690 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
691 start += 2;
692 } else if (start[0] == kLF_PdfWhiteSpace) {
693 start += 1;
694 } else if (isPdfWhiteSpace(start[0])) {
695 start += 1;
696 } else {
697 SkASSERT(isPdfDelimiter(start[0]));
698 // TODO(edisonn): warning?
699 }
700
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000701 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
702 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000703
704 if (endstream) {
705 int length = endstream - start;
706 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
707 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
708 inlineImage->addStream(start, (size_t)length);
709 } else {
710 // TODO(edisonn): report error in inline image stream (ID-EI) section
711 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
712 return end;
713 }
714 return endEI;
715}
716
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000717static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000718 if (allocator == NULL) {
719 // TODO(edisonn): report/warning error
720 return end;
721 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000722 TRACE_INDENT(level, "Dictionary");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000723 SkPdfObject::makeEmptyDictionary(dict);
724
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000725 start = skipPdfWhiteSpaces(level, start, end);
726 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000727
728 while (start < end && *start == kNamed_PdfDelimiter) {
729 SkPdfObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000730 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000731 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000732 start = readName(level + 1, start, end, &key, &tmpStorage);
733 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000734
735 if (start < end) {
736 SkPdfObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000737 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000738
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000739 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000740
741 if (start < end) {
742 // seems we have an indirect reference
743 if (isPdfDigit(*start)) {
744 SkPdfObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000745 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000746
747 SkPdfObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000748 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000749
750 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
751 int64_t id = value->intValue();
752 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000753 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000754 dict->set(&key, value);
755 } else {
756 // error, ignore
757 dict->set(&key, value);
758 }
759 } else {
760 // next elem is not a digit, but it might not be / either!
761 dict->set(&key, value);
762 }
763 } else {
764 // /key >>
765 dict->set(&key, value);
766 return end;
767 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000768 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000769 } else {
770 dict->set(&key, &SkPdfObject::kNull);
771 return end;
772 }
773 }
774
775 // TODO(edisonn): options to ignore these errors
776
777 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000778 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000779 if (*start != kClosedInequityBracket_PdfDelimiter) {
780 // TODO(edisonn): report/warning
781 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000782 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000783 start++; // skip >
784 if (*start != kClosedInequityBracket_PdfDelimiter) {
785 // TODO(edisonn): report/warning
786 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000787 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000788 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000789
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000790 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000791
792 return start;
793}
794
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000795const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
796 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000797
798 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000799 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000801 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000802
803 // no token, len would be 0
804 if (current == start) {
805 return NULL;
806 }
807
808 int tokenLen = current - start;
809
810 if (tokenLen == 1) {
811 // start array
812 switch (*start) {
813 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000814 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000815 SkPdfObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000816 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000817
818 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000819 //*start = '\0';
820 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000821
822 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000823 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000824 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000825 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000826 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000827 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000828 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000829 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000830 }
831
832 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000833 //*start = '\0';
834 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000835
836 // TODO(edisonn): what to do curly brackets? read spec!
837 case kOpenedCurlyBracket_PdfDelimiter:
838 default:
839 break;
840 }
841
842 SkASSERT(!isPdfWhiteSpace(*start));
843 if (isPdfDelimiter(*start)) {
844 // TODO(edisonn): how stream ] } > ) will be handled?
845 // for now ignore, and it will become a keyword to be ignored
846 }
847 }
848
849 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
850 SkPdfObject::makeNull(token);
851 return current;
852 }
853
854 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
855 SkPdfObject::makeBoolean(true, token);
856 return current;
857 }
858
edisonn@google.comf111a4b2013-07-31 18:22:36 +0000859 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000860 SkPdfObject::makeBoolean(false, token);
861 return current;
862 }
863
864 if (isPdfNumeric(*start)) {
865 SkPdfObject::makeNumeric(start, current, token);
866 } else {
867 SkPdfObject::makeKeyword(start, current, token);
868 }
869 return current;
870}
871
872SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000873 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000874 return new SkPdfObject[BUFFER_SIZE];
875}
876
877SkPdfAllocator::~SkPdfAllocator() {
878 for (int i = 0 ; i < fHandles.count(); i++) {
879 free(fHandles[i]);
880 }
881 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000882 for (int j = 0 ; j < BUFFER_SIZE; j++) {
883 fHistory[i][j].reset();
884 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000885 delete[] fHistory[i];
886 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000887 for (int j = 0 ; j < BUFFER_SIZE; j++) {
888 fCurrent[j].reset();
889 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000890 delete[] fCurrent;
891}
892
893SkPdfObject* SkPdfAllocator::allocObject() {
894 if (fCurrentUsed >= BUFFER_SIZE) {
895 fHistory.push(fCurrent);
896 fCurrent = allocBlock();
897 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000898 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000899 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000900 fCurrentUsed++;
901 return &fCurrent[fCurrentUsed - 1];
902}
903
904// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000905SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000906 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000907 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000908 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000909 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000910 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000911 if (endobj) {
912 len = endobj - (char*)buffer + strlen("endobj");
913 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000914 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000915 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000916}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000917
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000918SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000919 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000920 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000921 if (endobj) {
922 len = endobj - (char*)buffer + strlen("endobj");
923 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000924 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000925 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000926}
927
928SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000929}
930
931bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com91ce6982013-08-05 20:45:40 +0000932 SkPdfObject obj;
933#ifdef PDF_TRACE_READ_TOKEN
934 static int read_op = 0;
935 int last;
936#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000937 token->fKeyword = NULL;
938 token->fObject = NULL;
939
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000940 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000941 if (fUncompressedStream >= fUncompressedStreamEnd) {
942 return false;
943 }
944
edisonn@google.com91ce6982013-08-05 20:45:40 +0000945#ifdef PDF_TRACE_READ_TOKEN
946 printf("BEFORE the read: %i\n", read_op);
947 last = read_op;
948#endif
949
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000950 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com91ce6982013-08-05 20:45:40 +0000951#ifdef PDF_TRACE_READ_TOKEN
952 printf("BEFORE the read: %i\n", read_op);
953 if (last != read_op) {
954 printf("break; // memory override");
955 }
956#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000957
958 // If it is a keyword, we will only get the pointer of the string
959 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
960 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000961 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000962 token->fType = kKeyword_TokenType;
963 } else {
964 SkPdfObject* pobj = fAllocator->allocObject();
965 *pobj = obj;
966 token->fObject = pobj;
967 token->fType = kObject_TokenType;
968 }
969
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000970#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000971 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000972#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000973 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000974 printf("break;\n");
975 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000976#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000977 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
978#endif
979
980 return true;
981}
982
983void SkPdfNativeTokenizer::PutBack(PdfToken token) {
984 SkASSERT(!fHasPutBack);
985 fHasPutBack = true;
986 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000987#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000988 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
989#endif
990}
991
992bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
993 if (fHasPutBack) {
994 *token = fPutBack;
995 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000996#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000997 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
998#endif
999 return true;
1000 }
1001
1002 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001003#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +00001004 printf("EMPTY TOKENIZER\n");
1005#endif
1006 return false;
1007 }
1008
1009 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001010}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001011
1012#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1013
1014// keys
1015DECLARE_PDF_NAME(BitsPerComponent);
1016DECLARE_PDF_NAME(ColorSpace);
1017DECLARE_PDF_NAME(Decode);
1018DECLARE_PDF_NAME(DecodeParms);
1019DECLARE_PDF_NAME(Filter);
1020DECLARE_PDF_NAME(Height);
1021DECLARE_PDF_NAME(ImageMask);
1022DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1023DECLARE_PDF_NAME(Interpolate);
1024DECLARE_PDF_NAME(Width);
1025
1026// values
1027DECLARE_PDF_NAME(DeviceGray);
1028DECLARE_PDF_NAME(DeviceRGB);
1029DECLARE_PDF_NAME(DeviceCMYK);
1030DECLARE_PDF_NAME(Indexed);
1031DECLARE_PDF_NAME(ASCIIHexDecode);
1032DECLARE_PDF_NAME(ASCII85Decode);
1033DECLARE_PDF_NAME(LZWDecode);
1034DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1035DECLARE_PDF_NAME(RunLengthDecode);
1036DECLARE_PDF_NAME(CCITTFaxDecode);
1037DECLARE_PDF_NAME(DCTDecode);
1038
1039#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1040
1041
1042static SkPdfObject* inlineImageKeyAbbreviationExpand(SkPdfObject* key) {
1043 if (!key || !key->isName()) {
1044 return key;
1045 }
1046
1047 // TODO(edisonn): use autogenerated code!
1048 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1049 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1050 HANDLE_NAME_ABBR(key, Decode, D);
1051 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1052 HANDLE_NAME_ABBR(key, Filter, F);
1053 HANDLE_NAME_ABBR(key, Height, H);
1054 HANDLE_NAME_ABBR(key, ImageMask, IM);
1055// HANDLE_NAME_ABBR(key, Intent, );
1056 HANDLE_NAME_ABBR(key, Interpolate, I);
1057 HANDLE_NAME_ABBR(key, Width, W);
1058
1059 return key;
1060}
1061
1062static SkPdfObject* inlineImageValueAbbreviationExpand(SkPdfObject* value) {
1063 if (!value || !value->isName()) {
1064 return value;
1065 }
1066
1067 // TODO(edisonn): use autogenerated code!
1068 HANDLE_NAME_ABBR(value, DeviceGray, G);
1069 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1070 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1071 HANDLE_NAME_ABBR(value, Indexed, I);
1072 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1073 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1074 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1075 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1076 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1077 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1078 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1079
1080 return value;
1081}
1082
1083SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1084 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001085 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001086 if (fUncompressedStream >= fUncompressedStreamEnd) {
1087 return NULL;
1088 }
1089
1090 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
1091 SkPdfObject::makeEmptyDictionary(inlineImage);
1092
1093 while (fUncompressedStream < fUncompressedStreamEnd) {
1094 SkPdfObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001095 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001096
edisonn@google.come878e722013-07-29 19:10:58 +00001097 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001098 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001099 return inlineImage;
1100 } else {
1101 SkPdfObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001102 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001103 // TODO(edisonn): perf maybe we should not expand abreviation like this
1104 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1105 inlineImageValueAbbreviationExpand(obj));
1106 }
1107 }
1108 // TODO(edisonn): report end of data with inline image without an EI
1109 return inlineImage;
1110}