blob: 79935bc954b9f800df6d8ffcc1a07ad3a4cb598a [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
edisonn@google.com571c70b2013-07-10 17:09:50 +00008#include "SkPdfConfig.h"
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00009#include "SkPdfNativeObject.h"
10#include "SkPdfNativeTokenizer.h"
11#include "SkPdfUtils.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000012
edisonn@google.com33f11b62013-08-14 21:35:27 +000013// TODO(edisonn): mac builder does not find the header ... but from headers is ok
14//#include "SkPdfStreamCommonDictionary_autogen.h"
15//#include "SkPdfImageDictionary_autogen.h"
16#include "SkPdfHeaders_autogen.h"
17
edisonn@google.com78b38b12013-07-15 18:20:58 +000018
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000019// TODO(edisonn): Perf, Make this function run faster.
20// There could be 0s between start and end.
21// needle will not contain 0s.
edisonn@google.com78b38b12013-07-15 18:20:58 +000022static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
23 int needleLen = strlen(needle);
24 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
25 strncmp(hayStart, needle, needleLen) == 0) {
26 return hayStart;
27 }
28
29 hayStart++;
30
31 while (hayStart < hayEnd) {
32 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000033 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) ||
34 (hayStart+needleLen == hayEnd)) &&
edisonn@google.com78b38b12013-07-15 18:20:58 +000035 strncmp(hayStart, needle, needleLen) == 0) {
36 return hayStart;
37 }
38 hayStart++;
39 }
40 return NULL;
41}
42
edisonn@google.com598cf5d2013-10-09 15:13:19 +000043const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000044 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000046 if (*start == kComment_PdfDelimiter) {
47 // skip the comment until end of line
48 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000049 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000050 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000051 }
52 } else {
edisonn@google.com571c70b2013-07-10 17:09:50 +000053 start++;
54 }
55 }
56 return start;
57}
58
edisonn@google.com598cf5d2013-10-09 15:13:19 +000059const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000060 SkASSERT(!isPdfWhiteSpace(*start));
61
62 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000063 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000064 start++;
65 return start;
66 }
67
68 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000069 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000070 start++;
71 }
72 return start;
73}
74
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000075// The parsing should end with a ].
76static const unsigned char* readArray(const unsigned char* start, const unsigned char* end,
77 SkPdfNativeObject* array,
78 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +000079 SkPdfNativeObject::makeEmptyArray(array);
80 // PUT_TRACK_STREAM(array, start, start)
edisonn@google.combca421b2013-09-05 20:00:21 +000081
edisonn@google.com1f080162013-07-23 21:05:49 +000082 if (allocator == NULL) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000083 // TODO(edisonn): report/warning error/assert
edisonn@google.com1f080162013-07-23 21:05:49 +000084 return end;
85 }
86
edisonn@google.com571c70b2013-07-10 17:09:50 +000087 while (start < end) {
88 // skip white spaces
edisonn@google.com598cf5d2013-10-09 15:13:19 +000089 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000090
edisonn@google.com598cf5d2013-10-09 15:13:19 +000091 const unsigned char* endOfToken = endOfPdfToken(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000092
93 if (endOfToken == start) {
94 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
95 return start;
96 }
97
98 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
99 return endOfToken;
100 }
101
edisonn@google.com3aa35552013-08-14 18:26:20 +0000102 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000103 start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000104 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array
105 // only when we are sure they are not references!
106 if (newObj->isKeywordReference() && array->size() >= 2 &&
107 array->objAtAIndex(array->size() - 1)->isInteger() &&
108 array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000109 SkPdfNativeObject* gen = array->removeLastInArray();
110 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.combca421b2013-09-05 20:00:21 +0000111
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000112 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(),
113 (unsigned int)gen->intValue(), newObj);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000114 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115 }
116 array->appendInArray(newObj);
117 }
118 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000119 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
120 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121 return start;
122}
123
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000124static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
125 unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000126 const unsigned char* in = start;
127 bool hasOut = (out != NULL);
128
129 int openRoundBrackets = 1;
130 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000131 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
132 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000133 if (openRoundBrackets == 0) {
134 in++; // consumed )
135 break;
136 }
137
edisonn@google.com571c70b2013-07-10 17:09:50 +0000138 if (*in == kEscape_PdfSpecial) {
139 if (in + 1 < end) {
140 switch (in[1]) {
141 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000142 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000143 out++;
144 in += 2;
145 break;
146
147 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000148 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000149 out++;
150 in += 2;
151 break;
152
153 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000154 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000155 out++;
156 in += 2;
157 break;
158
159 case 'b':
160 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000161 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000162 out++;
163 in += 2;
164 break;
165
166 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000167 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000168 out++;
169 in += 2;
170 break;
171
172 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000173 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000174 out++;
175 in += 2;
176 break;
177
178 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000179 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000180 out++;
181 in += 2;
182 break;
183
184 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000185 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000186 out++;
187 in += 2;
188 break;
189
190 case '0':
191 case '1':
192 case '2':
193 case '3':
194 case '4':
195 case '5':
196 case '6':
197 case '7': {
198 //read octals
199 in++; // consume backslash
200
201 int code = 0;
202 int i = 0;
203 while (in < end && *in >= '0' && *in < '8') {
204 code = (code << 3) + ((*in) - '0'); // code * 8 + d
205 i++;
206 in++;
207 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000208 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000209 out++;
210 i = 0;
211 }
212 }
213 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000214 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000215 out++;
216 }
217 }
218 break;
219
220 default:
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000221 // Per spec, backslash is ignored if escaped ch is unknown
edisonn@google.com571c70b2013-07-10 17:09:50 +0000222 in++;
223 break;
224 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000225 } else {
226 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000227 }
228 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000229 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000230 in++;
231 out++;
232 }
233 }
234
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000235 if (hasOut) {
236 return in; // consumed already ) at the end of the string
237 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000238 // return where the string would end if we reuse the string
239 return start + (out - (const unsigned char*)NULL);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000240 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000241}
242
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000243static int readStringLength(const unsigned char* start, const unsigned char* end) {
244 return readString(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000245}
246
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000247static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
248 SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000249 if (!allocator) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000250 // TODO(edisonn): report error/warn/assert
edisonn@google.comb44334c2013-07-23 20:47:05 +0000251 return end;
252 }
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000253
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000254 int outLength = readStringLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000255 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000256 const unsigned char* now = readString(start, end, out);
257 SkPdfNativeObject::makeString(out, out + outLength, str);
258 // PUT_TRACK_STREAM(str, start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000259 TRACE_STRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000260 return now; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000261}
262
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000263static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end,
264 unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000265 bool hasOut = (out != NULL);
266 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000267
268 unsigned char code = 0;
269
270 while (in < end) {
271 while (in < end && isPdfWhiteSpace(*in)) {
272 in++;
273 }
274
275 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000276 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000277 // normal exit
278 break;
279 }
280
281 if (in >= end) {
282 // end too soon
283 break;
284 }
285
286 switch (*in) {
287 case '0':
288 case '1':
289 case '2':
290 case '3':
291 case '4':
292 case '5':
293 case '6':
294 case '7':
295 case '8':
296 case '9':
297 code = (*in - '0') << 4;
298 break;
299
300 case 'a':
301 case 'b':
302 case 'c':
303 case 'd':
304 case 'e':
305 case 'f':
306 code = (*in - 'a' + 10) << 4;
307 break;
308
309 case 'A':
310 case 'B':
311 case 'C':
312 case 'D':
313 case 'E':
314 case 'F':
315 code = (*in - 'A' + 10) << 4;
316 break;
317
318 // TODO(edisonn): spec does not say how to handle this error
319 default:
320 break;
321 }
322
323 in++; // advance
324
325 while (in < end && isPdfWhiteSpace(*in)) {
326 in++;
327 }
328
329 // TODO(edisonn): report error
330 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000331 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000332 out++;
333 break;
334 }
335
336 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000337 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000338 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000339 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000340 break;
341 }
342
343 switch (*in) {
344 case '0':
345 case '1':
346 case '2':
347 case '3':
348 case '4':
349 case '5':
350 case '6':
351 case '7':
352 case '8':
353 case '9':
354 code += (*in - '0');
355 break;
356
357 case 'a':
358 case 'b':
359 case 'c':
360 case 'd':
361 case 'e':
362 case 'f':
363 code += (*in - 'a' + 10);
364 break;
365
366 case 'A':
367 case 'B':
368 case 'C':
369 case 'D':
370 case 'E':
371 case 'F':
372 code += (*in - 'A' + 10);
373 break;
374
375 // TODO(edisonn): spec does not say how to handle this error
376 default:
377 break;
378 }
379
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000380 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000381 out++;
382 in++;
383 }
384
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000385 if (hasOut) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000386 return in; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000387 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000388 // return where the string would end if we reuse the string
389 return start + (out - (const unsigned char*)NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000390 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000391}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000392
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000393static int readHexStringLength(const unsigned char* start, const unsigned char* end) {
394 return readHexString(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000395}
396
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000397static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000398 if (!allocator) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000399 // TODO(edisonn): report error/warn/assert
edisonn@google.comb44334c2013-07-23 20:47:05 +0000400 return end;
401 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000402 int outLength = readHexStringLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000403 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000404 const unsigned char* now = readHexString(start, end, out);
405 SkPdfNativeObject::makeHexString(out, out + outLength, str);
406 // str PUT_TRACK_STREAM(start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000407 TRACE_HEXSTRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000408 return now; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000409}
410
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000411// TODO(edisonn): add version parameter, before PDF 1.2 name could not have special characters.
412static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
413 unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000414 bool hasOut = (out != NULL);
415 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000416
417 unsigned char code = 0;
418
419 while (in < end) {
420 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
421 break;
422 }
423
424 if (*in == '#' && in + 2 < end) {
425 in++;
426 switch (*in) {
427 case '0':
428 case '1':
429 case '2':
430 case '3':
431 case '4':
432 case '5':
433 case '6':
434 case '7':
435 case '8':
436 case '9':
437 code = (*in - '0') << 4;
438 break;
439
440 case 'a':
441 case 'b':
442 case 'c':
443 case 'd':
444 case 'e':
445 case 'f':
446 code = (*in - 'a' + 10) << 4;
447 break;
448
449 case 'A':
450 case 'B':
451 case 'C':
452 case 'D':
453 case 'E':
454 case 'F':
455 code = (*in - 'A' + 10) << 4;
456 break;
457
458 // TODO(edisonn): spec does not say how to handle this error
459 default:
460 break;
461 }
462
463 in++; // advance
464
465 switch (*in) {
466 case '0':
467 case '1':
468 case '2':
469 case '3':
470 case '4':
471 case '5':
472 case '6':
473 case '7':
474 case '8':
475 case '9':
476 code += (*in - '0');
477 break;
478
479 case 'a':
480 case 'b':
481 case 'c':
482 case 'd':
483 case 'e':
484 case 'f':
485 code += (*in - 'a' + 10);
486 break;
487
488 case 'A':
489 case 'B':
490 case 'C':
491 case 'D':
492 case 'E':
493 case 'F':
494 code += (*in - 'A' + 10);
495 break;
496
497 // TODO(edisonn): spec does not say how to handle this error
498 default:
499 break;
500 }
501
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000502 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000503 out++;
504 in++;
505 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000506 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000507 out++;
508 in++;
509 }
510 }
511
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000512 if (hasOut) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000513 return in; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000514 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000515 // return where the string would end if we reuse the string
516 return start + (out - (const unsigned char*)NULL);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000517 }
518}
519
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000520static int readNameLength(const unsigned char* start, const unsigned char* end) {
521 return readName(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000522}
523
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000524static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
525 SkPdfNativeObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000526 if (!allocator) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000527 // TODO(edisonn): report error/warn/assert
edisonn@google.comb44334c2013-07-23 20:47:05 +0000528 return end;
529 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000530 int outLength = readNameLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000531 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000532 const unsigned char* now = readName(start, end, out);
533 SkPdfNativeObject::makeName(out, out + outLength, name);
534 //PUT_TRACK_STREAM(start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000535 TRACE_NAME(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000536 return now;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000537}
538
539// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
540// that makes for an interesting scenario, where the stream itself contains endstream, together
541// with a reference object with the length, but the real length object would be somewhere else
542// it could confuse the parser
543/*example:
544
5457 0 obj
546<< /length 8 0 R>>
547stream
548...............
549endstream
5508 0 obj #we are in stream actually, not a real object
551<< 10 >> #we are in stream actually, not a real object
552endobj
553endstream
5548 0 obj #real obj
555<< 100 >> #real obj
556endobj
557and it could get worse, with multiple object like this
558*/
559
560// right now implement the silly algorithm that assumes endstream is finishing the stream
561
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000562static const unsigned char* readStream(const unsigned char* start, const unsigned char* end,
563 SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000564 start = skipPdfWhiteSpaces(start, end);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000565 if (!( start[0] == 's' &&
566 start[1] == 't' &&
567 start[2] == 'r' &&
568 start[3] == 'e' &&
569 start[4] == 'a' &&
570 start[5] == 'm')) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000571 // no stream. return.
572 return start;
573 }
574
575 start += 6; // strlen("stream")
576 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
577 start += 2;
578 } else if (start[0] == kLF_PdfWhiteSpace) {
579 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000580 } else if (isPdfWhiteSpace(start[0])) {
581 start += 1;
582 } else {
583 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000584 }
585
586 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
587 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000588 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000589
590 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000591 if (stream->has_Length() && stream->Length(doc) > 0) {
592 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000593 }
594
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000595 // TODO(edisonn): load external streams
596 // TODO(edisonn): look at the last filter, to determine how to deal with possible parsing
597 // issues. The last filter can have special rules to terminate a stream, which we could
598 // use to determine end of stream.
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000599
600 if (length >= 0) {
601 const unsigned char* endstream = start + length;
602
603 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
604 endstream += 2;
605 } else if (endstream[0] == kLF_PdfWhiteSpace) {
606 endstream += 1;
607 }
608
609 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
610 length = -1;
611 }
612 }
613
edisonn@google.com571c70b2013-07-10 17:09:50 +0000614 if (length < 0) {
615 // scan the buffer, until we find first endstream
616 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000617 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end,
618 "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000619
620 if (endstream) {
621 length = endstream - start;
622 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000623 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000624 }
625 }
626 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000627 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000628
629 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
630 endstream += 2;
631 } else if (endstream[0] == kLF_PdfWhiteSpace) {
632 endstream += 1;
633 }
634
635 // TODO(edisonn): verify the next bytes are "endstream"
636
637 endstream += strlen("endstream");
638 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000639 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000640 return endstream;
641 }
642 return start;
643}
644
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000645static const unsigned char* readInlineImageStream(const unsigned char* start,
646 const unsigned char* end,
647 SkPdfImageDictionary* inlineImage,
648 SkPdfNativeDoc* doc) {
edisonn@google.com78b38b12013-07-15 18:20:58 +0000649 // We already processed ID keyword, and we should be positioned immediately after it
650
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000651 // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end,
652 // instead of this if.
653 //if (end - start <= 2) {
654 // // TODO(edisonn): warning?
655 // return end; // but can we have a pixel image encoded in 1-2 bytes?
656 //}
657
edisonn@google.com78b38b12013-07-15 18:20:58 +0000658 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
659 start += 2;
660 } else if (start[0] == kLF_PdfWhiteSpace) {
661 start += 1;
662 } else if (isPdfWhiteSpace(start[0])) {
663 start += 1;
664 } else {
665 SkASSERT(isPdfDelimiter(start[0]));
666 // TODO(edisonn): warning?
667 }
668
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000669 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
670 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000671
672 if (endstream) {
673 int length = endstream - start;
674 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
675 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
676 inlineImage->addStream(start, (size_t)length);
677 } else {
678 // TODO(edisonn): report error in inline image stream (ID-EI) section
679 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
680 return end;
681 }
682 return endEI;
683}
684
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000685static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end,
686 SkPdfNativeObject* dict,
687 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000688 if (allocator == NULL) {
689 // TODO(edisonn): report/warning error
690 return end;
691 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000692 SkPdfNativeObject::makeEmptyDictionary(dict);
693 // PUT_TRACK_STREAM(dict, start, start)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000694
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000695 start = skipPdfWhiteSpaces(start, end);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000696 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000697
698 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000699 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000700 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000701 start++;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000702 start = readName(start, end, &key, &tmpStorage);
703 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000704
705 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000706 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000707 start = nextObject(start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000708
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000709 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000710
711 if (start < end) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000712 // We should have an indirect reference
edisonn@google.com571c70b2013-07-10 17:09:50 +0000713 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000714 SkPdfNativeObject generation;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000715 start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000716
edisonn@google.com3aa35552013-08-14 18:26:20 +0000717 SkPdfNativeObject keywordR;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000718 start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000719
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000720 if (value->isInteger() && generation.isInteger() &&
721 keywordR.isKeywordReference()) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000722 int64_t id = value->intValue();
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000723 SkPdfNativeObject::resetAndMakeReference(
724 (unsigned int)id,
725 (unsigned int)generation.intValue(),
726 value);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000727 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000728 dict->set(&key, value);
729 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000730 // TODO(edisonn) error?, ignore it for now.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000731 dict->set(&key, value);
732 }
733 } else {
734 // next elem is not a digit, but it might not be / either!
735 dict->set(&key, value);
736 }
737 } else {
738 // /key >>
739 dict->set(&key, value);
740 return end;
741 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000742 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000744 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000745 return end;
746 }
747 }
748
edisonn@google.com571c70b2013-07-10 17:09:50 +0000749 // now we should expect >>
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000750 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000751 if (*start != kClosedInequityBracket_PdfDelimiter) {
752 // TODO(edisonn): report/warning
753 }
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000754
edisonn@google.com78b38b12013-07-15 18:20:58 +0000755 start++; // skip >
756 if (*start != kClosedInequityBracket_PdfDelimiter) {
757 // TODO(edisonn): report/warning
758 }
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000759
edisonn@google.com78b38b12013-07-15 18:20:58 +0000760 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000761
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000762 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
edisonn@google.combca421b2013-09-05 20:00:21 +0000763
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000764 start = readStream(start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000765
766 return start;
767}
768
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000769const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
770 SkPdfNativeObject* token,
771 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000772 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000773
774 // skip white spaces
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000775 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000776
edisonn@google.comaf54a512013-09-13 19:33:42 +0000777 if (start >= end) {
778 return end;
779 }
780
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000781 current = endOfPdfToken(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000782
783 // no token, len would be 0
edisonn@google.comaf54a512013-09-13 19:33:42 +0000784 if (current == start || current == end) {
785 return end;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000786 }
787
788 int tokenLen = current - start;
789
790 if (tokenLen == 1) {
791 // start array
792 switch (*start) {
793 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000794 return readArray(current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000795
796 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000797 return readString(start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000798
799 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
801 // TODO(edisonn): pass here the length somehow?
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000802 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000803 } else {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000804 return readHexString(start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000805 }
806
807 case kNamed_PdfDelimiter:
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000808 return readName(start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000809
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000810 // TODO(edisonn): what to do curly brackets?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000811 case kOpenedCurlyBracket_PdfDelimiter:
812 default:
813 break;
814 }
815
816 SkASSERT(!isPdfWhiteSpace(*start));
817 if (isPdfDelimiter(*start)) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000818 // TODO(edisonn): how unexpected stream ] } > ) will be handled?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000819 // for now ignore, and it will become a keyword to be ignored
820 }
821 }
822
823 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000824 SkPdfNativeObject::makeNull(token);
825 // PUT_TRACK_STREAM(start, start + 4)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000826 return current;
827 }
828
829 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000830 SkPdfNativeObject::makeBoolean(true, token);
831 // PUT_TRACK_STREAM(start, start + 4)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000832 return current;
833 }
834
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000835 // TODO(edisonn): again, make all buffers have 5 extra bytes
836 if (tokenLen == 5 && start[0] == 'f' &&
837 start[1] == 'a' &&
838 start[2] == 'l' &&
839 start[3] == 's' &&
840 start[4] == 'e') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000841 SkPdfNativeObject::makeBoolean(false, token);
842 // PUT_TRACK_STREAM(start, start + 5)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000843 return current;
844 }
845
846 if (isPdfNumeric(*start)) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000847 SkPdfNativeObject::makeNumeric(start, current, token);
848 // PUT_TRACK_STREAM(start, current)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000849 } else {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000850 SkPdfNativeObject::makeKeyword(start, current, token);
851 // PUT_TRACK_STREAM(start, current)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000852 }
853 return current;
854}
855
edisonn@google.com3aa35552013-08-14 18:26:20 +0000856SkPdfNativeObject* SkPdfAllocator::allocBlock() {
857 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
858 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000859}
860
861SkPdfAllocator::~SkPdfAllocator() {
862 for (int i = 0 ; i < fHandles.count(); i++) {
863 free(fHandles[i]);
864 }
865 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000866 for (int j = 0 ; j < BUFFER_SIZE; j++) {
867 fHistory[i][j].reset();
868 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000869 delete[] fHistory[i];
870 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000871 for (int j = 0 ; j < BUFFER_SIZE; j++) {
872 fCurrent[j].reset();
873 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000874 delete[] fCurrent;
875}
876
edisonn@google.com3aa35552013-08-14 18:26:20 +0000877SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000878 if (fCurrentUsed >= BUFFER_SIZE) {
879 fHistory.push(fCurrent);
880 fCurrent = allocBlock();
881 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000882 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000883 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000884 fCurrentUsed++;
885 return &fCurrent[fCurrentUsed - 1];
886}
887
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000888// TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache the result,
889// so there is no need of a second pass
890SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
891 SkPdfAllocator* allocator,
892 SkPdfNativeDoc* doc)
893 : fDoc(doc)
894 , fAllocator(allocator)
895 , fUncompressedStream(NULL)
896 , fUncompressedStreamEnd(NULL)
897 , fEmpty(false)
898 , fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000899 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000900 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000901 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000902 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
903 // we need to do now for perf, and our generated pdfs do not have comments,
904 // but we need to remove this hack for pdfs in the wild
edisonn@google.com78b38b12013-07-15 18:20:58 +0000905 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000906 if (endobj) {
907 len = endobj - (char*)buffer + strlen("endobj");
908 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000909 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000910 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000911}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000912
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000913SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len,
914 SkPdfAllocator* allocator,
915 SkPdfNativeDoc* doc) : fDoc(doc)
916 , fAllocator(allocator)
917 , fEmpty(false)
918 , fHasPutBack(false) {
919 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
920 // we need to do now for perf, and our generated pdfs do not have comments,
921 // but we need to remove this hack for pdfs in the wild
edisonn@google.com78b38b12013-07-15 18:20:58 +0000922 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000923 if (endobj) {
924 len = endobj - (char*)buffer + strlen("endobj");
925 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000926 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000927 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000928}
929
930SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000931}
932
933bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com91ce6982013-08-05 20:45:40 +0000934#ifdef PDF_TRACE_READ_TOKEN
935 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000936#endif
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000937
edisonn@google.com571c70b2013-07-10 17:09:50 +0000938 token->fKeyword = NULL;
939 token->fObject = NULL;
940
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000941 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000942 if (fUncompressedStream >= fUncompressedStreamEnd) {
943 return false;
944 }
945
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000946 SkPdfNativeObject obj;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000947 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
948 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000949
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000950 // If it is a keyword, we will only get the pointer of the string.
edisonn@google.com3aa35552013-08-14 18:26:20 +0000951 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000952 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000953 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000954 token->fType = kKeyword_TokenType;
955 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000956 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000957 *pobj = obj;
958 token->fObject = pobj;
959 token->fType = kObject_TokenType;
960 }
961
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000962#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000963 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000964#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000965 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000966 printf("break;\n");
967 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000968#endif
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000969 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object",
970 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
971 token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000972#endif
973
974 return true;
975}
976
977void SkPdfNativeTokenizer::PutBack(PdfToken token) {
978 SkASSERT(!fHasPutBack);
979 fHasPutBack = true;
980 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000981#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000982 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object",
983 token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str() :
984 token.fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000985#endif
986}
987
988bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
989 if (fHasPutBack) {
990 *token = fPutBack;
991 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000992#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000993 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object",
994 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
995 token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000996#endif
997 return true;
998 }
999
1000 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001001#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +00001002 printf("EMPTY TOKENIZER\n");
1003#endif
1004 return false;
1005 }
1006
1007 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001008}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001009
1010#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1011
1012// keys
1013DECLARE_PDF_NAME(BitsPerComponent);
1014DECLARE_PDF_NAME(ColorSpace);
1015DECLARE_PDF_NAME(Decode);
1016DECLARE_PDF_NAME(DecodeParms);
1017DECLARE_PDF_NAME(Filter);
1018DECLARE_PDF_NAME(Height);
1019DECLARE_PDF_NAME(ImageMask);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001020DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations?
edisonn@google.com78b38b12013-07-15 18:20:58 +00001021DECLARE_PDF_NAME(Interpolate);
1022DECLARE_PDF_NAME(Width);
1023
1024// values
1025DECLARE_PDF_NAME(DeviceGray);
1026DECLARE_PDF_NAME(DeviceRGB);
1027DECLARE_PDF_NAME(DeviceCMYK);
1028DECLARE_PDF_NAME(Indexed);
1029DECLARE_PDF_NAME(ASCIIHexDecode);
1030DECLARE_PDF_NAME(ASCII85Decode);
1031DECLARE_PDF_NAME(LZWDecode);
1032DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1033DECLARE_PDF_NAME(RunLengthDecode);
1034DECLARE_PDF_NAME(CCITTFaxDecode);
1035DECLARE_PDF_NAME(DCTDecode);
1036
1037#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1038
1039
edisonn@google.com3aa35552013-08-14 18:26:20 +00001040static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001041 if (!key || !key->isName()) {
1042 return key;
1043 }
1044
1045 // TODO(edisonn): use autogenerated code!
1046 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1047 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1048 HANDLE_NAME_ABBR(key, Decode, D);
1049 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1050 HANDLE_NAME_ABBR(key, Filter, F);
1051 HANDLE_NAME_ABBR(key, Height, H);
1052 HANDLE_NAME_ABBR(key, ImageMask, IM);
1053// HANDLE_NAME_ABBR(key, Intent, );
1054 HANDLE_NAME_ABBR(key, Interpolate, I);
1055 HANDLE_NAME_ABBR(key, Width, W);
1056
1057 return key;
1058}
1059
edisonn@google.com3aa35552013-08-14 18:26:20 +00001060static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001061 if (!value || !value->isName()) {
1062 return value;
1063 }
1064
1065 // TODO(edisonn): use autogenerated code!
1066 HANDLE_NAME_ABBR(value, DeviceGray, G);
1067 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1068 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1069 HANDLE_NAME_ABBR(value, Indexed, I);
1070 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1071 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1072 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1073 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1074 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1075 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1076 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1077
1078 return value;
1079}
1080
1081SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1082 // BI already processed
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001083 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001084 if (fUncompressedStream >= fUncompressedStreamEnd) {
1085 return NULL;
1086 }
1087
1088 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001089 SkPdfNativeObject::makeEmptyDictionary(inlineImage);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001090 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart,
1091 // fUncompressedStream - fUncompressedStreamStart)
edisonn@google.com78b38b12013-07-15 18:20:58 +00001092
1093 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001094 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001095 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key,
1096 fAllocator, fDoc);
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001097 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
edisonn@google.com78b38b12013-07-15 18:20:58 +00001098
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001099 if (key->isKeyword() && key->lenstr() == 2 &&
1100 key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
1101 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd,
1102 inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001103 return inlineImage;
1104 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001105 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001106 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj,
1107 fAllocator, fDoc);
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001108 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001109 // TODO(edisonn): perf maybe we should not expand abBreviation like this
edisonn@google.com78b38b12013-07-15 18:20:58 +00001110 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1111 inlineImageValueAbbreviationExpand(obj));
1112 }
1113 }
1114 // TODO(edisonn): report end of data with inline image without an EI
1115 return inlineImage;
1116}