blob: 42654ffa72c8fa9e7660595c6755fe6893e0b938 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
8#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +00009#include "SkPdfNativeObject.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +000010#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000011
edisonn@google.com33f11b62013-08-14 21:35:27 +000012// TODO(edisonn): mac builder does not find the header ... but from headers is ok
13//#include "SkPdfStreamCommonDictionary_autogen.h"
14//#include "SkPdfImageDictionary_autogen.h"
15#include "SkPdfHeaders_autogen.h"
16
edisonn@google.com78b38b12013-07-15 18:20:58 +000017
18// TODO(edisonn): perf!!!
19// there could be 0s between start and end! but not in the needle.
20static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
21 int needleLen = strlen(needle);
22 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26
27 hayStart++;
28
29 while (hayStart < hayEnd) {
30 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
31 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
32 strncmp(hayStart, needle, needleLen) == 0) {
33 return hayStart;
34 }
35 hayStart++;
36 }
37 return NULL;
38}
39
edisonn@google.come2e01ff2013-08-02 20:24:48 +000040#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000041static void TRACE_INDENT(int level, const char* type) {
42 static int id = 0;
43 id++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +000044#if 0
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045 if (478613 == id) {
46 printf("break;\n");
47 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +000048#endif
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000049 // all types should have 2 letters, so the text is alligned nicely
50 printf("\n%10i %15s: ", id, type);
51 for (int i = 0 ; i < level; i++) {
52 printf(" ");
53 }
54}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000055
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000056static void TRACE_COMMENT(char ch) {
57 printf("%c", ch);
58}
59
60static void TRACE_TK(char ch) {
61 printf("%c", ch);
62}
63
64static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
65 while (start < end) {
66 printf("%c", *start);
67 start++;
68 }
69 printf("\n");
70}
71
72static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
73 while (start < end) {
74 printf("%c", *start);
75 start++;
76 }
77 printf("\n");
78}
79
80static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
81 while (start < end) {
82 printf("%c", *start);
83 start++;
84 }
85 printf("\n");
86}
87
88#else
89#define TRACE_INDENT(level,type)
90#define TRACE_COMMENT(ch)
91#define TRACE_TK(ch)
92#define TRACE_NAME(start,end)
93#define TRACE_STRING(start,end)
94#define TRACE_HEXSTRING(start,end)
95#endif
96
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000097const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 TRACE_INDENT(level, "White Space");
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000099 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000100 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000101 if (*start == kComment_PdfDelimiter) {
102 // skip the comment until end of line
103 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000104 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000105 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000106 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 }
108 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110 start++;
111 }
112 }
113 return start;
114}
115
116// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000117const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000118 //int opened brackets
119 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121
122 SkASSERT(!isPdfWhiteSpace(*start));
123
124 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000125 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126 start++;
127 return start;
128 }
129
130 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000131 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132 start++;
133 }
134 return start;
135}
136
edisonn@google.com571c70b2013-07-10 17:09:50 +0000137// last elem has to be ]
edisonn@google.com3aa35552013-08-14 18:26:20 +0000138static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* array, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000139 if (allocator == NULL) {
140 // TODO(edisonn): report/warning error
141 return end;
142 }
143
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000144 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000145 while (start < end) {
146 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000147 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000148
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000149 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000150
151 if (endOfToken == start) {
152 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
153 return start;
154 }
155
156 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
157 return endOfToken;
158 }
159
edisonn@google.com3aa35552013-08-14 18:26:20 +0000160 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000161 start = nextObject(level + 1, start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000162 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
163 // we are sure they are not references!
164 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000165 SkPdfNativeObject* gen = array->removeLastInArray();
166 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000167 newObj->reset();
edisonn@google.com3aa35552013-08-14 18:26:20 +0000168 SkPdfNativeObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000169 }
170 array->appendInArray(newObj);
171 }
172 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000173 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
174 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000175 return start;
176}
177
178// When we read strings we will rewrite the string so we will reuse the memory
179// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000180
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000181// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
182
183static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
184 TRACE_INDENT(level, "String");
185 const unsigned char* in = start;
186 bool hasOut = (out != NULL);
187
188 int openRoundBrackets = 1;
189 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000190 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
191 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000192 if (openRoundBrackets == 0) {
193 in++; // consumed )
194 break;
195 }
196
edisonn@google.com571c70b2013-07-10 17:09:50 +0000197 if (*in == kEscape_PdfSpecial) {
198 if (in + 1 < end) {
199 switch (in[1]) {
200 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000201 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000202 out++;
203 in += 2;
204 break;
205
206 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000207 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000208 out++;
209 in += 2;
210 break;
211
212 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000213 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000214 out++;
215 in += 2;
216 break;
217
218 case 'b':
219 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000220 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000221 out++;
222 in += 2;
223 break;
224
225 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000226 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000227 out++;
228 in += 2;
229 break;
230
231 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000232 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000233 out++;
234 in += 2;
235 break;
236
237 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000238 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000239 out++;
240 in += 2;
241 break;
242
243 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000244 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000245 out++;
246 in += 2;
247 break;
248
249 case '0':
250 case '1':
251 case '2':
252 case '3':
253 case '4':
254 case '5':
255 case '6':
256 case '7': {
257 //read octals
258 in++; // consume backslash
259
260 int code = 0;
261 int i = 0;
262 while (in < end && *in >= '0' && *in < '8') {
263 code = (code << 3) + ((*in) - '0'); // code * 8 + d
264 i++;
265 in++;
266 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000267 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000268 out++;
269 i = 0;
270 }
271 }
272 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000273 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000274 out++;
275 }
276 }
277 break;
278
279 default:
280 // Per spec, backslash is ignored is escaped ch is unknown
281 in++;
282 break;
283 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000284 } else {
285 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000286 }
287 } else {
288 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
289 // we could have one look that first just inc current, and when we find the backslash
290 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000291 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000292 in++;
293 out++;
294 }
295 }
296
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000297 if (hasOut) {
298 return in; // consumed already ) at the end of the string
299 } else {
300 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
301 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000302}
303
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000304static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
305 return readString(level, start, end, NULL) - start;
306}
307
edisonn@google.com3aa35552013-08-14 18:26:20 +0000308static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000309 if (!allocator) {
310 return end;
311 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000312 int outLength = readStringLength(level, start, end);
313 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
314 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
315 start = readString(level, start, end, out);
edisonn@google.com3aa35552013-08-14 18:26:20 +0000316 SkPdfNativeObject::makeString(out, out + outLength, str);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000317 TRACE_STRING(out, out + outLength);
318 return start; // consumed already ) at the end of the string
319}
320
321static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
322 TRACE_INDENT(level, "HexString");
323 bool hasOut = (out != NULL);
324 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000325
326 unsigned char code = 0;
327
328 while (in < end) {
329 while (in < end && isPdfWhiteSpace(*in)) {
330 in++;
331 }
332
333 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000334 //*in = '\0';
335 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000336 // normal exit
337 break;
338 }
339
340 if (in >= end) {
341 // end too soon
342 break;
343 }
344
345 switch (*in) {
346 case '0':
347 case '1':
348 case '2':
349 case '3':
350 case '4':
351 case '5':
352 case '6':
353 case '7':
354 case '8':
355 case '9':
356 code = (*in - '0') << 4;
357 break;
358
359 case 'a':
360 case 'b':
361 case 'c':
362 case 'd':
363 case 'e':
364 case 'f':
365 code = (*in - 'a' + 10) << 4;
366 break;
367
368 case 'A':
369 case 'B':
370 case 'C':
371 case 'D':
372 case 'E':
373 case 'F':
374 code = (*in - 'A' + 10) << 4;
375 break;
376
377 // TODO(edisonn): spec does not say how to handle this error
378 default:
379 break;
380 }
381
382 in++; // advance
383
384 while (in < end && isPdfWhiteSpace(*in)) {
385 in++;
386 }
387
388 // TODO(edisonn): report error
389 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000390 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000391 out++;
392 break;
393 }
394
395 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000396 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000397 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000398 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000399 break;
400 }
401
402 switch (*in) {
403 case '0':
404 case '1':
405 case '2':
406 case '3':
407 case '4':
408 case '5':
409 case '6':
410 case '7':
411 case '8':
412 case '9':
413 code += (*in - '0');
414 break;
415
416 case 'a':
417 case 'b':
418 case 'c':
419 case 'd':
420 case 'e':
421 case 'f':
422 code += (*in - 'a' + 10);
423 break;
424
425 case 'A':
426 case 'B':
427 case 'C':
428 case 'D':
429 case 'E':
430 case 'F':
431 code += (*in - 'A' + 10);
432 break;
433
434 // TODO(edisonn): spec does not say how to handle this error
435 default:
436 break;
437 }
438
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000439 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000440 out++;
441 in++;
442 }
443
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000444 if (hasOut) {
445 return in; // consumed already > at the end of the string
446 } else {
447 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000448 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000449}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000450
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000451static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
452 return readHexString(level, start, end, NULL) - start;
453}
454
edisonn@google.com3aa35552013-08-14 18:26:20 +0000455static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000456 if (!allocator) {
457 return end;
458 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000459 int outLength = readHexStringLength(level, start, end);
460 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
461 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
462 start = readHexString(level, start, end, out);
edisonn@google.com3aa35552013-08-14 18:26:20 +0000463 SkPdfNativeObject::makeHexString(out, out + outLength, str);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000464 TRACE_HEXSTRING(out, out + outLength);
465 return start; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000466}
467
468// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000469static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
470 TRACE_INDENT(level, "Name");
471 bool hasOut = (out != NULL);
472 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000473
474 unsigned char code = 0;
475
476 while (in < end) {
477 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
478 break;
479 }
480
481 if (*in == '#' && in + 2 < end) {
482 in++;
483 switch (*in) {
484 case '0':
485 case '1':
486 case '2':
487 case '3':
488 case '4':
489 case '5':
490 case '6':
491 case '7':
492 case '8':
493 case '9':
494 code = (*in - '0') << 4;
495 break;
496
497 case 'a':
498 case 'b':
499 case 'c':
500 case 'd':
501 case 'e':
502 case 'f':
503 code = (*in - 'a' + 10) << 4;
504 break;
505
506 case 'A':
507 case 'B':
508 case 'C':
509 case 'D':
510 case 'E':
511 case 'F':
512 code = (*in - 'A' + 10) << 4;
513 break;
514
515 // TODO(edisonn): spec does not say how to handle this error
516 default:
517 break;
518 }
519
520 in++; // advance
521
522 switch (*in) {
523 case '0':
524 case '1':
525 case '2':
526 case '3':
527 case '4':
528 case '5':
529 case '6':
530 case '7':
531 case '8':
532 case '9':
533 code += (*in - '0');
534 break;
535
536 case 'a':
537 case 'b':
538 case 'c':
539 case 'd':
540 case 'e':
541 case 'f':
542 code += (*in - 'a' + 10);
543 break;
544
545 case 'A':
546 case 'B':
547 case 'C':
548 case 'D':
549 case 'E':
550 case 'F':
551 code += (*in - 'A' + 10);
552 break;
553
554 // TODO(edisonn): spec does not say how to handle this error
555 default:
556 break;
557 }
558
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000559 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000560 out++;
561 in++;
562 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000563 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000564 out++;
565 in++;
566 }
567 }
568
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000569 if (hasOut) {
570 return in;
571 } else {
572 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
573 }
574}
575
576static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
577 return readName(level, start, end, NULL) - start;
578}
579
edisonn@google.com3aa35552013-08-14 18:26:20 +0000580static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000581 if (!allocator) {
582 return end;
583 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000584 int outLength = readNameLength(level, start, end);
585 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
586 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
587 start = readName(level, start, end, out);
edisonn@google.com3aa35552013-08-14 18:26:20 +0000588 SkPdfNativeObject::makeName(out, out + outLength, name);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000589 TRACE_NAME(out, out + outLength);
590 return start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000591}
592
593// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
594// that makes for an interesting scenario, where the stream itself contains endstream, together
595// with a reference object with the length, but the real length object would be somewhere else
596// it could confuse the parser
597/*example:
598
5997 0 obj
600<< /length 8 0 R>>
601stream
602...............
603endstream
6048 0 obj #we are in stream actually, not a real object
605<< 10 >> #we are in stream actually, not a real object
606endobj
607endstream
6088 0 obj #real obj
609<< 100 >> #real obj
610endobj
611and it could get worse, with multiple object like this
612*/
613
614// right now implement the silly algorithm that assumes endstream is finishing the stream
615
616
edisonn@google.com3aa35552013-08-14 18:26:20 +0000617static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000618 TRACE_INDENT(level, "Stream");
619 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000620 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
621 // no stream. return.
622 return start;
623 }
624
625 start += 6; // strlen("stream")
626 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
627 start += 2;
628 } else if (start[0] == kLF_PdfWhiteSpace) {
629 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000630 } else if (isPdfWhiteSpace(start[0])) {
631 start += 1;
632 } else {
633 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
634 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000635 }
636
637 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
638 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000639 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000640
641 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000642 if (stream->has_Length() && stream->Length(doc) > 0) {
643 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000644 }
645
646 // TODO(edisonn): laod external streams
647 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
648
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000649
650 if (length >= 0) {
651 const unsigned char* endstream = start + length;
652
653 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
654 endstream += 2;
655 } else if (endstream[0] == kLF_PdfWhiteSpace) {
656 endstream += 1;
657 }
658
659 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
660 length = -1;
661 }
662 }
663
edisonn@google.com571c70b2013-07-10 17:09:50 +0000664 if (length < 0) {
665 // scan the buffer, until we find first endstream
666 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000667 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000668
669 if (endstream) {
670 length = endstream - start;
671 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000672 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000673 }
674 }
675 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000676 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000677
678 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
679 endstream += 2;
680 } else if (endstream[0] == kLF_PdfWhiteSpace) {
681 endstream += 1;
682 }
683
684 // TODO(edisonn): verify the next bytes are "endstream"
685
686 endstream += strlen("endstream");
687 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000688 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000689 return endstream;
690 }
691 return start;
692}
693
edisonn@google.com3aa35552013-08-14 18:26:20 +0000694static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000695 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000696 // We already processed ID keyword, and we should be positioned immediately after it
697
698 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
699 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
700 start += 2;
701 } else if (start[0] == kLF_PdfWhiteSpace) {
702 start += 1;
703 } else if (isPdfWhiteSpace(start[0])) {
704 start += 1;
705 } else {
706 SkASSERT(isPdfDelimiter(start[0]));
707 // TODO(edisonn): warning?
708 }
709
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000710 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
711 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000712
713 if (endstream) {
714 int length = endstream - start;
715 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
716 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
717 inlineImage->addStream(start, (size_t)length);
718 } else {
719 // TODO(edisonn): report error in inline image stream (ID-EI) section
720 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
721 return end;
722 }
723 return endEI;
724}
725
edisonn@google.com3aa35552013-08-14 18:26:20 +0000726static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000727 if (allocator == NULL) {
728 // TODO(edisonn): report/warning error
729 return end;
730 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000731 TRACE_INDENT(level, "Dictionary");
edisonn@google.com3aa35552013-08-14 18:26:20 +0000732 SkPdfNativeObject::makeEmptyDictionary(dict);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000733
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000734 start = skipPdfWhiteSpaces(level, start, end);
735 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000736
737 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000738 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000739 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000740 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000741 start = readName(level + 1, start, end, &key, &tmpStorage);
742 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743
744 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000745 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000746 start = nextObject(level + 1, start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000747
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000748 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000749
750 if (start < end) {
751 // seems we have an indirect reference
752 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000753 SkPdfNativeObject generation;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000754 start = nextObject(level + 1, start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000755
edisonn@google.com3aa35552013-08-14 18:26:20 +0000756 SkPdfNativeObject keywordR;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000757 start = nextObject(level + 1, start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000758
759 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
760 int64_t id = value->intValue();
761 value->reset();
edisonn@google.com3aa35552013-08-14 18:26:20 +0000762 SkPdfNativeObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000763 dict->set(&key, value);
764 } else {
765 // error, ignore
766 dict->set(&key, value);
767 }
768 } else {
769 // next elem is not a digit, but it might not be / either!
770 dict->set(&key, value);
771 }
772 } else {
773 // /key >>
774 dict->set(&key, value);
775 return end;
776 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000777 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000778 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000779 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000780 return end;
781 }
782 }
783
784 // TODO(edisonn): options to ignore these errors
785
786 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000787 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000788 if (*start != kClosedInequityBracket_PdfDelimiter) {
789 // TODO(edisonn): report/warning
790 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000791 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000792 start++; // skip >
793 if (*start != kClosedInequityBracket_PdfDelimiter) {
794 // TODO(edisonn): report/warning
795 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000796 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000797 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000798
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000799 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800
801 return start;
802}
803
edisonn@google.com3aa35552013-08-14 18:26:20 +0000804const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000805 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000806
807 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000808 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000809
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000810 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000811
812 // no token, len would be 0
813 if (current == start) {
814 return NULL;
815 }
816
817 int tokenLen = current - start;
818
819 if (tokenLen == 1) {
820 // start array
821 switch (*start) {
822 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000823 //*start = '\0';
edisonn@google.com3aa35552013-08-14 18:26:20 +0000824 SkPdfNativeObject::makeEmptyArray(token);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000825 return readArray(level + 1, current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000826
827 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000828 //*start = '\0';
829 return readString(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000830
831 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000832 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000833 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000834 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000835 // TODO(edisonn): pass here the length somehow?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000836 return readDictionary(level + 1, start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000837 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000838 return readHexString(level, start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000839 }
840
841 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000842 //*start = '\0';
843 return readName(level, start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000844
845 // TODO(edisonn): what to do curly brackets? read spec!
846 case kOpenedCurlyBracket_PdfDelimiter:
847 default:
848 break;
849 }
850
851 SkASSERT(!isPdfWhiteSpace(*start));
852 if (isPdfDelimiter(*start)) {
853 // TODO(edisonn): how stream ] } > ) will be handled?
854 // for now ignore, and it will become a keyword to be ignored
855 }
856 }
857
858 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000859 SkPdfNativeObject::makeNull(token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000860 return current;
861 }
862
863 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000864 SkPdfNativeObject::makeBoolean(true, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000865 return current;
866 }
867
edisonn@google.comf111a4b2013-07-31 18:22:36 +0000868 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000869 SkPdfNativeObject::makeBoolean(false, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000870 return current;
871 }
872
873 if (isPdfNumeric(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000874 SkPdfNativeObject::makeNumeric(start, current, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000875 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000876 SkPdfNativeObject::makeKeyword(start, current, token);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000877 }
878 return current;
879}
880
edisonn@google.com3aa35552013-08-14 18:26:20 +0000881SkPdfNativeObject* SkPdfAllocator::allocBlock() {
882 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
883 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000884}
885
886SkPdfAllocator::~SkPdfAllocator() {
887 for (int i = 0 ; i < fHandles.count(); i++) {
888 free(fHandles[i]);
889 }
890 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000891 for (int j = 0 ; j < BUFFER_SIZE; j++) {
892 fHistory[i][j].reset();
893 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000894 delete[] fHistory[i];
895 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000896 for (int j = 0 ; j < BUFFER_SIZE; j++) {
897 fCurrent[j].reset();
898 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000899 delete[] fCurrent;
900}
901
edisonn@google.com3aa35552013-08-14 18:26:20 +0000902SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000903 if (fCurrentUsed >= BUFFER_SIZE) {
904 fHistory.push(fCurrent);
905 fCurrent = allocBlock();
906 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000907 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000908 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000909 fCurrentUsed++;
910 return &fCurrent[fCurrentUsed - 1];
911}
912
913// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com33f11b62013-08-14 21:35:27 +0000914SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000915 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000916 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000917 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000918 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000919 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000920 if (endobj) {
921 len = endobj - (char*)buffer + strlen("endobj");
922 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000923 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000924 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000925}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000926
edisonn@google.com33f11b62013-08-14 21:35:27 +0000927SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000928 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000929 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000930 if (endobj) {
931 len = endobj - (char*)buffer + strlen("endobj");
932 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000933 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000934 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000935}
936
937SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000938}
939
940bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000941 SkPdfNativeObject obj;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000942#ifdef PDF_TRACE_READ_TOKEN
943 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000944#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000945 token->fKeyword = NULL;
946 token->fObject = NULL;
947
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000948 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000949 if (fUncompressedStream >= fUncompressedStreamEnd) {
950 return false;
951 }
952
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000953 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000954
955 // If it is a keyword, we will only get the pointer of the string
edisonn@google.com3aa35552013-08-14 18:26:20 +0000956 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000957 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000958 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000959 token->fType = kKeyword_TokenType;
960 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000961 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000962 *pobj = obj;
963 token->fObject = pobj;
964 token->fType = kObject_TokenType;
965 }
966
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000967#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000968 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000969#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000970 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000971 printf("break;\n");
972 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000973#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000974 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
975#endif
976
977 return true;
978}
979
980void SkPdfNativeTokenizer::PutBack(PdfToken token) {
981 SkASSERT(!fHasPutBack);
982 fHasPutBack = true;
983 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000984#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000985 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
986#endif
987}
988
989bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
990 if (fHasPutBack) {
991 *token = fPutBack;
992 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000993#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000994 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
995#endif
996 return true;
997 }
998
999 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001000#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +00001001 printf("EMPTY TOKENIZER\n");
1002#endif
1003 return false;
1004 }
1005
1006 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001007}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001008
1009#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1010
1011// keys
1012DECLARE_PDF_NAME(BitsPerComponent);
1013DECLARE_PDF_NAME(ColorSpace);
1014DECLARE_PDF_NAME(Decode);
1015DECLARE_PDF_NAME(DecodeParms);
1016DECLARE_PDF_NAME(Filter);
1017DECLARE_PDF_NAME(Height);
1018DECLARE_PDF_NAME(ImageMask);
1019DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1020DECLARE_PDF_NAME(Interpolate);
1021DECLARE_PDF_NAME(Width);
1022
1023// values
1024DECLARE_PDF_NAME(DeviceGray);
1025DECLARE_PDF_NAME(DeviceRGB);
1026DECLARE_PDF_NAME(DeviceCMYK);
1027DECLARE_PDF_NAME(Indexed);
1028DECLARE_PDF_NAME(ASCIIHexDecode);
1029DECLARE_PDF_NAME(ASCII85Decode);
1030DECLARE_PDF_NAME(LZWDecode);
1031DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1032DECLARE_PDF_NAME(RunLengthDecode);
1033DECLARE_PDF_NAME(CCITTFaxDecode);
1034DECLARE_PDF_NAME(DCTDecode);
1035
1036#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1037
1038
edisonn@google.com3aa35552013-08-14 18:26:20 +00001039static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001040 if (!key || !key->isName()) {
1041 return key;
1042 }
1043
1044 // TODO(edisonn): use autogenerated code!
1045 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1046 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1047 HANDLE_NAME_ABBR(key, Decode, D);
1048 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1049 HANDLE_NAME_ABBR(key, Filter, F);
1050 HANDLE_NAME_ABBR(key, Height, H);
1051 HANDLE_NAME_ABBR(key, ImageMask, IM);
1052// HANDLE_NAME_ABBR(key, Intent, );
1053 HANDLE_NAME_ABBR(key, Interpolate, I);
1054 HANDLE_NAME_ABBR(key, Width, W);
1055
1056 return key;
1057}
1058
edisonn@google.com3aa35552013-08-14 18:26:20 +00001059static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001060 if (!value || !value->isName()) {
1061 return value;
1062 }
1063
1064 // TODO(edisonn): use autogenerated code!
1065 HANDLE_NAME_ABBR(value, DeviceGray, G);
1066 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1067 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1068 HANDLE_NAME_ABBR(value, Indexed, I);
1069 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1070 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1071 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1072 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1073 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1074 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1075 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1076
1077 return value;
1078}
1079
1080SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1081 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001082 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001083 if (fUncompressedStream >= fUncompressedStreamEnd) {
1084 return NULL;
1085 }
1086
1087 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.com3aa35552013-08-14 18:26:20 +00001088 SkPdfNativeObject::makeEmptyDictionary(inlineImage);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001089
1090 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001091 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001092 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001093
edisonn@google.come878e722013-07-29 19:10:58 +00001094 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001095 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001096 return inlineImage;
1097 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001098 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001099 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001100 // TODO(edisonn): perf maybe we should not expand abreviation like this
1101 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1102 inlineImageValueAbbreviationExpand(obj));
1103 }
1104 }
1105 // TODO(edisonn): report end of data with inline image without an EI
1106 return inlineImage;
1107}