blob: 178ca6c9f6c1a73cb0f812cec0f1783b5d5be73f [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +00003#include "SkPdfNativeObject.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00004#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com33f11b62013-08-14 21:35:27 +00006// TODO(edisonn): mac builder does not find the header ... but from headers is ok
7//#include "SkPdfStreamCommonDictionary_autogen.h"
8//#include "SkPdfImageDictionary_autogen.h"
9#include "SkPdfHeaders_autogen.h"
10
edisonn@google.com78b38b12013-07-15 18:20:58 +000011
12// TODO(edisonn): perf!!!
13// there could be 0s between start and end! but not in the needle.
14static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
15 int needleLen = strlen(needle);
16 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
17 strncmp(hayStart, needle, needleLen) == 0) {
18 return hayStart;
19 }
20
21 hayStart++;
22
23 while (hayStart < hayEnd) {
24 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
25 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
26 strncmp(hayStart, needle, needleLen) == 0) {
27 return hayStart;
28 }
29 hayStart++;
30 }
31 return NULL;
32}
33
edisonn@google.come2e01ff2013-08-02 20:24:48 +000034#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000035static void TRACE_INDENT(int level, const char* type) {
36 static int id = 0;
37 id++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +000038#if 0
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000039 if (478613 == id) {
40 printf("break;\n");
41 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +000042#endif
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000043 // all types should have 2 letters, so the text is alligned nicely
44 printf("\n%10i %15s: ", id, type);
45 for (int i = 0 ; i < level; i++) {
46 printf(" ");
47 }
48}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000049
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000050static void TRACE_COMMENT(char ch) {
51 printf("%c", ch);
52}
53
54static void TRACE_TK(char ch) {
55 printf("%c", ch);
56}
57
58static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
59 while (start < end) {
60 printf("%c", *start);
61 start++;
62 }
63 printf("\n");
64}
65
66static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
67 while (start < end) {
68 printf("%c", *start);
69 start++;
70 }
71 printf("\n");
72}
73
74static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
75 while (start < end) {
76 printf("%c", *start);
77 start++;
78 }
79 printf("\n");
80}
81
82#else
83#define TRACE_INDENT(level,type)
84#define TRACE_COMMENT(ch)
85#define TRACE_TK(ch)
86#define TRACE_NAME(start,end)
87#define TRACE_STRING(start,end)
88#define TRACE_HEXSTRING(start,end)
89#endif
90
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000091const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000092 TRACE_INDENT(level, "White Space");
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000093 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000094 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000095 if (*start == kComment_PdfDelimiter) {
96 // skip the comment until end of line
97 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000100 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000101 }
102 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000103 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000104 start++;
105 }
106 }
107 return start;
108}
109
110// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000111const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000112 //int opened brackets
113 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000114 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115
116 SkASSERT(!isPdfWhiteSpace(*start));
117
118 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000119 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000120 start++;
121 return start;
122 }
123
124 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000125 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126 start++;
127 }
128 return start;
129}
130
edisonn@google.com571c70b2013-07-10 17:09:50 +0000131// last elem has to be ]
edisonn@google.com3aa35552013-08-14 18:26:20 +0000132static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* array, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000133 if (allocator == NULL) {
134 // TODO(edisonn): report/warning error
135 return end;
136 }
137
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000138 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139 while (start < end) {
140 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000141 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000142
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000143 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000144
145 if (endOfToken == start) {
146 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
147 return start;
148 }
149
150 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
151 return endOfToken;
152 }
153
edisonn@google.com3aa35552013-08-14 18:26:20 +0000154 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000155 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000156 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
157 // we are sure they are not references!
158 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000159 SkPdfNativeObject* gen = array->removeLastInArray();
160 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000161 newObj->reset();
edisonn@google.com3aa35552013-08-14 18:26:20 +0000162 SkPdfNativeObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000163 }
164 array->appendInArray(newObj);
165 }
166 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000167 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
168 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000169 return start;
170}
171
172// When we read strings we will rewrite the string so we will reuse the memory
173// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000174
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000175// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
176
177static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
178 TRACE_INDENT(level, "String");
179 const unsigned char* in = start;
180 bool hasOut = (out != NULL);
181
182 int openRoundBrackets = 1;
183 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000184 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
185 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000186 if (openRoundBrackets == 0) {
187 in++; // consumed )
188 break;
189 }
190
edisonn@google.com571c70b2013-07-10 17:09:50 +0000191 if (*in == kEscape_PdfSpecial) {
192 if (in + 1 < end) {
193 switch (in[1]) {
194 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000195 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000196 out++;
197 in += 2;
198 break;
199
200 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000201 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000202 out++;
203 in += 2;
204 break;
205
206 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000207 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000208 out++;
209 in += 2;
210 break;
211
212 case 'b':
213 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000214 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000215 out++;
216 in += 2;
217 break;
218
219 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000220 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000221 out++;
222 in += 2;
223 break;
224
225 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000226 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000227 out++;
228 in += 2;
229 break;
230
231 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000232 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000233 out++;
234 in += 2;
235 break;
236
237 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000238 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000239 out++;
240 in += 2;
241 break;
242
243 case '0':
244 case '1':
245 case '2':
246 case '3':
247 case '4':
248 case '5':
249 case '6':
250 case '7': {
251 //read octals
252 in++; // consume backslash
253
254 int code = 0;
255 int i = 0;
256 while (in < end && *in >= '0' && *in < '8') {
257 code = (code << 3) + ((*in) - '0'); // code * 8 + d
258 i++;
259 in++;
260 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000261 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000262 out++;
263 i = 0;
264 }
265 }
266 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000267 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000268 out++;
269 }
270 }
271 break;
272
273 default:
274 // Per spec, backslash is ignored is escaped ch is unknown
275 in++;
276 break;
277 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000278 } else {
279 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000280 }
281 } else {
282 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
283 // we could have one look that first just inc current, and when we find the backslash
284 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000285 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000286 in++;
287 out++;
288 }
289 }
290
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000291 if (hasOut) {
292 return in; // consumed already ) at the end of the string
293 } else {
294 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
295 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000296}
297
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000298static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
299 return readString(level, start, end, NULL) - start;
300}
301
edisonn@google.com3aa35552013-08-14 18:26:20 +0000302static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000303 if (!allocator) {
304 return end;
305 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000306 int outLength = readStringLength(level, start, end);
307 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
308 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
309 start = readString(level, start, end, out);
edisonn@google.com3aa35552013-08-14 18:26:20 +0000310 SkPdfNativeObject::makeString(out, out + outLength, str);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000311 TRACE_STRING(out, out + outLength);
312 return start; // consumed already ) at the end of the string
313}
314
315static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
316 TRACE_INDENT(level, "HexString");
317 bool hasOut = (out != NULL);
318 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000319
320 unsigned char code = 0;
321
322 while (in < end) {
323 while (in < end && isPdfWhiteSpace(*in)) {
324 in++;
325 }
326
327 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000328 //*in = '\0';
329 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000330 // normal exit
331 break;
332 }
333
334 if (in >= end) {
335 // end too soon
336 break;
337 }
338
339 switch (*in) {
340 case '0':
341 case '1':
342 case '2':
343 case '3':
344 case '4':
345 case '5':
346 case '6':
347 case '7':
348 case '8':
349 case '9':
350 code = (*in - '0') << 4;
351 break;
352
353 case 'a':
354 case 'b':
355 case 'c':
356 case 'd':
357 case 'e':
358 case 'f':
359 code = (*in - 'a' + 10) << 4;
360 break;
361
362 case 'A':
363 case 'B':
364 case 'C':
365 case 'D':
366 case 'E':
367 case 'F':
368 code = (*in - 'A' + 10) << 4;
369 break;
370
371 // TODO(edisonn): spec does not say how to handle this error
372 default:
373 break;
374 }
375
376 in++; // advance
377
378 while (in < end && isPdfWhiteSpace(*in)) {
379 in++;
380 }
381
382 // TODO(edisonn): report error
383 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000384 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000385 out++;
386 break;
387 }
388
389 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000390 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000391 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000392 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000393 break;
394 }
395
396 switch (*in) {
397 case '0':
398 case '1':
399 case '2':
400 case '3':
401 case '4':
402 case '5':
403 case '6':
404 case '7':
405 case '8':
406 case '9':
407 code += (*in - '0');
408 break;
409
410 case 'a':
411 case 'b':
412 case 'c':
413 case 'd':
414 case 'e':
415 case 'f':
416 code += (*in - 'a' + 10);
417 break;
418
419 case 'A':
420 case 'B':
421 case 'C':
422 case 'D':
423 case 'E':
424 case 'F':
425 code += (*in - 'A' + 10);
426 break;
427
428 // TODO(edisonn): spec does not say how to handle this error
429 default:
430 break;
431 }
432
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000433 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000434 out++;
435 in++;
436 }
437
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000438 if (hasOut) {
439 return in; // consumed already > at the end of the string
440 } else {
441 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000442 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000443}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000444
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000445static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
446 return readHexString(level, start, end, NULL) - start;
447}
448
edisonn@google.com3aa35552013-08-14 18:26:20 +0000449static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000450 if (!allocator) {
451 return end;
452 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000453 int outLength = readHexStringLength(level, start, end);
454 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
455 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
456 start = readHexString(level, start, end, out);
edisonn@google.com3aa35552013-08-14 18:26:20 +0000457 SkPdfNativeObject::makeHexString(out, out + outLength, str);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000458 TRACE_HEXSTRING(out, out + outLength);
459 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000460}
461
462// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000463static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
464 TRACE_INDENT(level, "Name");
465 bool hasOut = (out != NULL);
466 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000467
468 unsigned char code = 0;
469
470 while (in < end) {
471 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
472 break;
473 }
474
475 if (*in == '#' && in + 2 < end) {
476 in++;
477 switch (*in) {
478 case '0':
479 case '1':
480 case '2':
481 case '3':
482 case '4':
483 case '5':
484 case '6':
485 case '7':
486 case '8':
487 case '9':
488 code = (*in - '0') << 4;
489 break;
490
491 case 'a':
492 case 'b':
493 case 'c':
494 case 'd':
495 case 'e':
496 case 'f':
497 code = (*in - 'a' + 10) << 4;
498 break;
499
500 case 'A':
501 case 'B':
502 case 'C':
503 case 'D':
504 case 'E':
505 case 'F':
506 code = (*in - 'A' + 10) << 4;
507 break;
508
509 // TODO(edisonn): spec does not say how to handle this error
510 default:
511 break;
512 }
513
514 in++; // advance
515
516 switch (*in) {
517 case '0':
518 case '1':
519 case '2':
520 case '3':
521 case '4':
522 case '5':
523 case '6':
524 case '7':
525 case '8':
526 case '9':
527 code += (*in - '0');
528 break;
529
530 case 'a':
531 case 'b':
532 case 'c':
533 case 'd':
534 case 'e':
535 case 'f':
536 code += (*in - 'a' + 10);
537 break;
538
539 case 'A':
540 case 'B':
541 case 'C':
542 case 'D':
543 case 'E':
544 case 'F':
545 code += (*in - 'A' + 10);
546 break;
547
548 // TODO(edisonn): spec does not say how to handle this error
549 default:
550 break;
551 }
552
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000553 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000554 out++;
555 in++;
556 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000557 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000558 out++;
559 in++;
560 }
561 }
562
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000563 if (hasOut) {
564 return in;
565 } else {
566 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
567 }
568}
569
570static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
571 return readName(level, start, end, NULL) - start;
572}
573
edisonn@google.com3aa35552013-08-14 18:26:20 +0000574static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000575 if (!allocator) {
576 return end;
577 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000578 int outLength = readNameLength(level, start, end);
579 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
580 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
581 start = readName(level, start, end, out);
edisonn@google.com3aa35552013-08-14 18:26:20 +0000582 SkPdfNativeObject::makeName(out, out + outLength, name);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000583 TRACE_NAME(out, out + outLength);
584 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000585}
586
587// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
588// that makes for an interesting scenario, where the stream itself contains endstream, together
589// with a reference object with the length, but the real length object would be somewhere else
590// it could confuse the parser
591/*example:
592
5937 0 obj
594<< /length 8 0 R>>
595stream
596...............
597endstream
5988 0 obj #we are in stream actually, not a real object
599<< 10 >> #we are in stream actually, not a real object
600endobj
601endstream
6028 0 obj #real obj
603<< 100 >> #real obj
604endobj
605and it could get worse, with multiple object like this
606*/
607
608// right now implement the silly algorithm that assumes endstream is finishing the stream
609
610
edisonn@google.com3aa35552013-08-14 18:26:20 +0000611static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000612 TRACE_INDENT(level, "Stream");
613 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000614 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
615 // no stream. return.
616 return start;
617 }
618
619 start += 6; // strlen("stream")
620 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
621 start += 2;
622 } else if (start[0] == kLF_PdfWhiteSpace) {
623 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000624 } else if (isPdfWhiteSpace(start[0])) {
625 start += 1;
626 } else {
627 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
628 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000629 }
630
631 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
632 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000633 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000634
635 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000636 if (stream->has_Length() && stream->Length(doc) > 0) {
637 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000638 }
639
640 // TODO(edisonn): laod external streams
641 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
642
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000643
644 if (length >= 0) {
645 const unsigned char* endstream = start + length;
646
647 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
648 endstream += 2;
649 } else if (endstream[0] == kLF_PdfWhiteSpace) {
650 endstream += 1;
651 }
652
653 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
654 length = -1;
655 }
656 }
657
edisonn@google.com571c70b2013-07-10 17:09:50 +0000658 if (length < 0) {
659 // scan the buffer, until we find first endstream
660 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000661 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000662
663 if (endstream) {
664 length = endstream - start;
665 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000666 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000667 }
668 }
669 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000670 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000671
672 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
673 endstream += 2;
674 } else if (endstream[0] == kLF_PdfWhiteSpace) {
675 endstream += 1;
676 }
677
678 // TODO(edisonn): verify the next bytes are "endstream"
679
680 endstream += strlen("endstream");
681 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000682 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000683 return endstream;
684 }
685 return start;
686}
687
edisonn@google.com3aa35552013-08-14 18:26:20 +0000688static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000689 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000690 // We already processed ID keyword, and we should be positioned immediately after it
691
692 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
693 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
694 start += 2;
695 } else if (start[0] == kLF_PdfWhiteSpace) {
696 start += 1;
697 } else if (isPdfWhiteSpace(start[0])) {
698 start += 1;
699 } else {
700 SkASSERT(isPdfDelimiter(start[0]));
701 // TODO(edisonn): warning?
702 }
703
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000704 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
705 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000706
707 if (endstream) {
708 int length = endstream - start;
709 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
710 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
711 inlineImage->addStream(start, (size_t)length);
712 } else {
713 // TODO(edisonn): report error in inline image stream (ID-EI) section
714 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
715 return end;
716 }
717 return endEI;
718}
719
edisonn@google.com3aa35552013-08-14 18:26:20 +0000720static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000721 if (allocator == NULL) {
722 // TODO(edisonn): report/warning error
723 return end;
724 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000725 TRACE_INDENT(level, "Dictionary");
edisonn@google.com3aa35552013-08-14 18:26:20 +0000726 SkPdfNativeObject::makeEmptyDictionary(dict);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000727
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000728 start = skipPdfWhiteSpaces(level, start, end);
729 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000730
731 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000732 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000733 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000734 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000735 start = readName(level + 1, start, end, &key, &tmpStorage);
736 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000737
738 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000739 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000740 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000741
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000742 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743
744 if (start < end) {
745 // seems we have an indirect reference
746 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000747 SkPdfNativeObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000748 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000749
edisonn@google.com3aa35552013-08-14 18:26:20 +0000750 SkPdfNativeObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000751 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000752
753 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
754 int64_t id = value->intValue();
755 value->reset();
edisonn@google.com3aa35552013-08-14 18:26:20 +0000756 SkPdfNativeObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000757 dict->set(&key, value);
758 } else {
759 // error, ignore
760 dict->set(&key, value);
761 }
762 } else {
763 // next elem is not a digit, but it might not be / either!
764 dict->set(&key, value);
765 }
766 } else {
767 // /key >>
768 dict->set(&key, value);
769 return end;
770 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000771 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000772 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000773 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000774 return end;
775 }
776 }
777
778 // TODO(edisonn): options to ignore these errors
779
780 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000781 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000782 if (*start != kClosedInequityBracket_PdfDelimiter) {
783 // TODO(edisonn): report/warning
784 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000785 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000786 start++; // skip >
787 if (*start != kClosedInequityBracket_PdfDelimiter) {
788 // TODO(edisonn): report/warning
789 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000790 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000791 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000792
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000793 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000794
795 return start;
796}
797
edisonn@google.com3aa35552013-08-14 18:26:20 +0000798const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000799 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800
801 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000802 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000803
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000804 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000805
806 // no token, len would be 0
807 if (current == start) {
808 return NULL;
809 }
810
811 int tokenLen = current - start;
812
813 if (tokenLen == 1) {
814 // start array
815 switch (*start) {
816 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000817 //*start = '\0';
edisonn@google.com3aa35552013-08-14 18:26:20 +0000818 SkPdfNativeObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000819 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000820
821 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000822 //*start = '\0';
823 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000824
825 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000826 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000827 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000828 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000829 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000830 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000831 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000832 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000833 }
834
835 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000836 //*start = '\0';
837 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000838
839 // TODO(edisonn): what to do curly brackets? read spec!
840 case kOpenedCurlyBracket_PdfDelimiter:
841 default:
842 break;
843 }
844
845 SkASSERT(!isPdfWhiteSpace(*start));
846 if (isPdfDelimiter(*start)) {
847 // TODO(edisonn): how stream ] } > ) will be handled?
848 // for now ignore, and it will become a keyword to be ignored
849 }
850 }
851
852 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000853 SkPdfNativeObject::makeNull(token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000854 return current;
855 }
856
857 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000858 SkPdfNativeObject::makeBoolean(true, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000859 return current;
860 }
861
edisonn@google.comf111a4b2013-07-31 18:22:36 +0000862 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000863 SkPdfNativeObject::makeBoolean(false, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000864 return current;
865 }
866
867 if (isPdfNumeric(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000868 SkPdfNativeObject::makeNumeric(start, current, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000869 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000870 SkPdfNativeObject::makeKeyword(start, current, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000871 }
872 return current;
873}
874
edisonn@google.com3aa35552013-08-14 18:26:20 +0000875SkPdfNativeObject* SkPdfAllocator::allocBlock() {
876 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
877 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000878}
879
880SkPdfAllocator::~SkPdfAllocator() {
881 for (int i = 0 ; i < fHandles.count(); i++) {
882 free(fHandles[i]);
883 }
884 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000885 for (int j = 0 ; j < BUFFER_SIZE; j++) {
886 fHistory[i][j].reset();
887 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000888 delete[] fHistory[i];
889 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000890 for (int j = 0 ; j < BUFFER_SIZE; j++) {
891 fCurrent[j].reset();
892 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000893 delete[] fCurrent;
894}
895
edisonn@google.com3aa35552013-08-14 18:26:20 +0000896SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000897 if (fCurrentUsed >= BUFFER_SIZE) {
898 fHistory.push(fCurrent);
899 fCurrent = allocBlock();
900 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000901 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000902 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000903 fCurrentUsed++;
904 return &fCurrent[fCurrentUsed - 1];
905}
906
907// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com33f11b62013-08-14 21:35:27 +0000908SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000909 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000910 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000911 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000912 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000913 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000914 if (endobj) {
915 len = endobj - (char*)buffer + strlen("endobj");
916 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000917 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000918 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000919}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000920
edisonn@google.com33f11b62013-08-14 21:35:27 +0000921SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000922 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000923 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000924 if (endobj) {
925 len = endobj - (char*)buffer + strlen("endobj");
926 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000927 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000928 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000929}
930
931SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000932}
933
934bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000935 SkPdfNativeObject obj;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000936#ifdef PDF_TRACE_READ_TOKEN
937 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000938#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000939 token->fKeyword = NULL;
940 token->fObject = NULL;
941
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000942 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000943 if (fUncompressedStream >= fUncompressedStreamEnd) {
944 return false;
945 }
946
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000947 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000948
949 // If it is a keyword, we will only get the pointer of the string
edisonn@google.com3aa35552013-08-14 18:26:20 +0000950 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000951 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000952 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000953 token->fType = kKeyword_TokenType;
954 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000955 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000956 *pobj = obj;
957 token->fObject = pobj;
958 token->fType = kObject_TokenType;
959 }
960
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000961#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000962 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000963#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000964 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000965 printf("break;\n");
966 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000967#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000968 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
969#endif
970
971 return true;
972}
973
974void SkPdfNativeTokenizer::PutBack(PdfToken token) {
975 SkASSERT(!fHasPutBack);
976 fHasPutBack = true;
977 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000978#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000979 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
980#endif
981}
982
983bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
984 if (fHasPutBack) {
985 *token = fPutBack;
986 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000987#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000988 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
989#endif
990 return true;
991 }
992
993 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000994#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000995 printf("EMPTY TOKENIZER\n");
996#endif
997 return false;
998 }
999
1000 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001001}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001002
1003#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1004
1005// keys
1006DECLARE_PDF_NAME(BitsPerComponent);
1007DECLARE_PDF_NAME(ColorSpace);
1008DECLARE_PDF_NAME(Decode);
1009DECLARE_PDF_NAME(DecodeParms);
1010DECLARE_PDF_NAME(Filter);
1011DECLARE_PDF_NAME(Height);
1012DECLARE_PDF_NAME(ImageMask);
1013DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1014DECLARE_PDF_NAME(Interpolate);
1015DECLARE_PDF_NAME(Width);
1016
1017// values
1018DECLARE_PDF_NAME(DeviceGray);
1019DECLARE_PDF_NAME(DeviceRGB);
1020DECLARE_PDF_NAME(DeviceCMYK);
1021DECLARE_PDF_NAME(Indexed);
1022DECLARE_PDF_NAME(ASCIIHexDecode);
1023DECLARE_PDF_NAME(ASCII85Decode);
1024DECLARE_PDF_NAME(LZWDecode);
1025DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1026DECLARE_PDF_NAME(RunLengthDecode);
1027DECLARE_PDF_NAME(CCITTFaxDecode);
1028DECLARE_PDF_NAME(DCTDecode);
1029
1030#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1031
1032
edisonn@google.com3aa35552013-08-14 18:26:20 +00001033static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001034 if (!key || !key->isName()) {
1035 return key;
1036 }
1037
1038 // TODO(edisonn): use autogenerated code!
1039 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1040 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1041 HANDLE_NAME_ABBR(key, Decode, D);
1042 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1043 HANDLE_NAME_ABBR(key, Filter, F);
1044 HANDLE_NAME_ABBR(key, Height, H);
1045 HANDLE_NAME_ABBR(key, ImageMask, IM);
1046// HANDLE_NAME_ABBR(key, Intent, );
1047 HANDLE_NAME_ABBR(key, Interpolate, I);
1048 HANDLE_NAME_ABBR(key, Width, W);
1049
1050 return key;
1051}
1052
edisonn@google.com3aa35552013-08-14 18:26:20 +00001053static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001054 if (!value || !value->isName()) {
1055 return value;
1056 }
1057
1058 // TODO(edisonn): use autogenerated code!
1059 HANDLE_NAME_ABBR(value, DeviceGray, G);
1060 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1061 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1062 HANDLE_NAME_ABBR(value, Indexed, I);
1063 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1064 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1065 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1066 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1067 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1068 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1069 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1070
1071 return value;
1072}
1073
1074SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1075 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001076 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001077 if (fUncompressedStream >= fUncompressedStreamEnd) {
1078 return NULL;
1079 }
1080
1081 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.com3aa35552013-08-14 18:26:20 +00001082 SkPdfNativeObject::makeEmptyDictionary(inlineImage);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001083
1084 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001085 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001086 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001087
edisonn@google.come878e722013-07-29 19:10:58 +00001088 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001089 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001090 return inlineImage;
1091 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001092 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001093 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001094 // TODO(edisonn): perf maybe we should not expand abreviation like this
1095 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1096 inlineImageValueAbbreviationExpand(obj));
1097 }
1098 }
1099 // TODO(edisonn): report end of data with inline image without an EI
1100 return inlineImage;
1101}