blob: 5dfe14e5fcbe588c7bc1800c78e963fcf16e63d4 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
edisonn@google.com571c70b2013-07-10 17:09:50 +00008#include "SkPdfConfig.h"
scroggo@google.com5092adc2013-12-02 20:22:31 +00009#include "SkPdfDiffEncoder.h"
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000010#include "SkPdfNativeObject.h"
11#include "SkPdfNativeTokenizer.h"
12#include "SkPdfUtils.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000013
edisonn@google.com33f11b62013-08-14 21:35:27 +000014// TODO(edisonn): mac builder does not find the header ... but from headers is ok
15//#include "SkPdfStreamCommonDictionary_autogen.h"
16//#include "SkPdfImageDictionary_autogen.h"
17#include "SkPdfHeaders_autogen.h"
18
edisonn@google.com78b38b12013-07-15 18:20:58 +000019
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000020// TODO(edisonn): Perf, Make this function run faster.
21// There could be 0s between start and end.
22// needle will not contain 0s.
edisonn@google.com78b38b12013-07-15 18:20:58 +000023static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
24 int needleLen = strlen(needle);
25 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
26 strncmp(hayStart, needle, needleLen) == 0) {
27 return hayStart;
28 }
29
30 hayStart++;
31
32 while (hayStart < hayEnd) {
33 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000034 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) ||
35 (hayStart+needleLen == hayEnd)) &&
edisonn@google.com78b38b12013-07-15 18:20:58 +000036 strncmp(hayStart, needle, needleLen) == 0) {
37 return hayStart;
38 }
39 hayStart++;
40 }
41 return NULL;
42}
43
edisonn@google.com598cf5d2013-10-09 15:13:19 +000044const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000045 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000046 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000047 if (*start == kComment_PdfDelimiter) {
48 // skip the comment until end of line
49 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000050 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000051 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000052 }
53 } else {
edisonn@google.com571c70b2013-07-10 17:09:50 +000054 start++;
55 }
56 }
57 return start;
58}
59
edisonn@google.com598cf5d2013-10-09 15:13:19 +000060const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000061 SkASSERT(!isPdfWhiteSpace(*start));
62
63 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000064 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000065 start++;
66 return start;
67 }
68
69 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000070 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000071 start++;
72 }
73 return start;
74}
75
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000076// The parsing should end with a ].
77static const unsigned char* readArray(const unsigned char* start, const unsigned char* end,
78 SkPdfNativeObject* array,
79 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +000080 SkPdfNativeObject::makeEmptyArray(array);
81 // PUT_TRACK_STREAM(array, start, start)
edisonn@google.combca421b2013-09-05 20:00:21 +000082
edisonn@google.com1f080162013-07-23 21:05:49 +000083 if (allocator == NULL) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000084 // TODO(edisonn): report/warning error/assert
edisonn@google.com1f080162013-07-23 21:05:49 +000085 return end;
86 }
87
edisonn@google.com571c70b2013-07-10 17:09:50 +000088 while (start < end) {
89 // skip white spaces
edisonn@google.com598cf5d2013-10-09 15:13:19 +000090 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000091
edisonn@google.com598cf5d2013-10-09 15:13:19 +000092 const unsigned char* endOfToken = endOfPdfToken(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000093
94 if (endOfToken == start) {
95 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
96 return start;
97 }
98
99 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
100 return endOfToken;
101 }
102
edisonn@google.com3aa35552013-08-14 18:26:20 +0000103 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000104 start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000105 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array
106 // only when we are sure they are not references!
107 if (newObj->isKeywordReference() && array->size() >= 2 &&
108 array->objAtAIndex(array->size() - 1)->isInteger() &&
109 array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000110 SkPdfNativeObject* gen = array->removeLastInArray();
111 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.combca421b2013-09-05 20:00:21 +0000112
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000113 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(),
114 (unsigned int)gen->intValue(), newObj);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000115 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now
edisonn@google.com571c70b2013-07-10 17:09:50 +0000116 }
117 array->appendInArray(newObj);
118 }
119 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000120 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
121 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000122 return start;
123}
124
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000125static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
126 unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000127 const unsigned char* in = start;
128 bool hasOut = (out != NULL);
129
130 int openRoundBrackets = 1;
131 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
133 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000134 if (openRoundBrackets == 0) {
135 in++; // consumed )
136 break;
137 }
138
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139 if (*in == kEscape_PdfSpecial) {
140 if (in + 1 < end) {
141 switch (in[1]) {
142 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000143 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000144 out++;
145 in += 2;
146 break;
147
148 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000149 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000150 out++;
151 in += 2;
152 break;
153
154 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000155 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000156 out++;
157 in += 2;
158 break;
159
160 case 'b':
161 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000162 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000163 out++;
164 in += 2;
165 break;
166
167 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000168 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000169 out++;
170 in += 2;
171 break;
172
173 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000174 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000175 out++;
176 in += 2;
177 break;
178
179 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000180 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000181 out++;
182 in += 2;
183 break;
184
185 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000186 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000187 out++;
188 in += 2;
189 break;
190
191 case '0':
192 case '1':
193 case '2':
194 case '3':
195 case '4':
196 case '5':
197 case '6':
198 case '7': {
199 //read octals
200 in++; // consume backslash
201
202 int code = 0;
203 int i = 0;
204 while (in < end && *in >= '0' && *in < '8') {
205 code = (code << 3) + ((*in) - '0'); // code * 8 + d
206 i++;
207 in++;
208 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000209 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000210 out++;
211 i = 0;
212 }
213 }
214 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000215 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000216 out++;
217 }
218 }
219 break;
220
221 default:
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000222 // Per spec, backslash is ignored if escaped ch is unknown
edisonn@google.com571c70b2013-07-10 17:09:50 +0000223 in++;
224 break;
225 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000226 } else {
227 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000228 }
229 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000230 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000231 in++;
232 out++;
233 }
234 }
235
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000236 if (hasOut) {
237 return in; // consumed already ) at the end of the string
238 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000239 // return where the string would end if we reuse the string
240 return start + (out - (const unsigned char*)NULL);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000241 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000242}
243
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000244static int readStringLength(const unsigned char* start, const unsigned char* end) {
245 return readString(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000246}
247
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000248static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
249 SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000250 if (!allocator) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000251 // TODO(edisonn): report error/warn/assert
edisonn@google.comb44334c2013-07-23 20:47:05 +0000252 return end;
253 }
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000254
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000255 int outLength = readStringLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000256 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000257 const unsigned char* now = readString(start, end, out);
258 SkPdfNativeObject::makeString(out, out + outLength, str);
259 // PUT_TRACK_STREAM(str, start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000260 TRACE_STRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000261 return now; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000262}
263
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000264static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end,
265 unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000266 bool hasOut = (out != NULL);
267 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000268
269 unsigned char code = 0;
270
271 while (in < end) {
272 while (in < end && isPdfWhiteSpace(*in)) {
273 in++;
274 }
275
276 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000277 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000278 // normal exit
279 break;
280 }
281
282 if (in >= end) {
283 // end too soon
284 break;
285 }
286
287 switch (*in) {
288 case '0':
289 case '1':
290 case '2':
291 case '3':
292 case '4':
293 case '5':
294 case '6':
295 case '7':
296 case '8':
297 case '9':
298 code = (*in - '0') << 4;
299 break;
300
301 case 'a':
302 case 'b':
303 case 'c':
304 case 'd':
305 case 'e':
306 case 'f':
307 code = (*in - 'a' + 10) << 4;
308 break;
309
310 case 'A':
311 case 'B':
312 case 'C':
313 case 'D':
314 case 'E':
315 case 'F':
316 code = (*in - 'A' + 10) << 4;
317 break;
318
319 // TODO(edisonn): spec does not say how to handle this error
320 default:
321 break;
322 }
323
324 in++; // advance
325
326 while (in < end && isPdfWhiteSpace(*in)) {
327 in++;
328 }
329
330 // TODO(edisonn): report error
331 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000332 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000333 out++;
334 break;
335 }
336
337 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000338 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000339 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000340 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000341 break;
342 }
343
344 switch (*in) {
345 case '0':
346 case '1':
347 case '2':
348 case '3':
349 case '4':
350 case '5':
351 case '6':
352 case '7':
353 case '8':
354 case '9':
355 code += (*in - '0');
356 break;
357
358 case 'a':
359 case 'b':
360 case 'c':
361 case 'd':
362 case 'e':
363 case 'f':
364 code += (*in - 'a' + 10);
365 break;
366
367 case 'A':
368 case 'B':
369 case 'C':
370 case 'D':
371 case 'E':
372 case 'F':
373 code += (*in - 'A' + 10);
374 break;
375
376 // TODO(edisonn): spec does not say how to handle this error
377 default:
378 break;
379 }
380
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000381 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000382 out++;
383 in++;
384 }
385
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000386 if (hasOut) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000387 return in; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000388 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000389 // return where the string would end if we reuse the string
390 return start + (out - (const unsigned char*)NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000391 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000392}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000393
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000394static int readHexStringLength(const unsigned char* start, const unsigned char* end) {
395 return readHexString(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000396}
397
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000398static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000399 if (!allocator) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000400 // TODO(edisonn): report error/warn/assert
edisonn@google.comb44334c2013-07-23 20:47:05 +0000401 return end;
402 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000403 int outLength = readHexStringLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000404 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000405 const unsigned char* now = readHexString(start, end, out);
406 SkPdfNativeObject::makeHexString(out, out + outLength, str);
407 // str PUT_TRACK_STREAM(start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000408 TRACE_HEXSTRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000409 return now; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000410}
411
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000412// TODO(edisonn): add version parameter, before PDF 1.2 name could not have special characters.
413static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
414 unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000415 bool hasOut = (out != NULL);
416 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000417
418 unsigned char code = 0;
419
420 while (in < end) {
421 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
422 break;
423 }
424
425 if (*in == '#' && in + 2 < end) {
426 in++;
427 switch (*in) {
428 case '0':
429 case '1':
430 case '2':
431 case '3':
432 case '4':
433 case '5':
434 case '6':
435 case '7':
436 case '8':
437 case '9':
438 code = (*in - '0') << 4;
439 break;
440
441 case 'a':
442 case 'b':
443 case 'c':
444 case 'd':
445 case 'e':
446 case 'f':
447 code = (*in - 'a' + 10) << 4;
448 break;
449
450 case 'A':
451 case 'B':
452 case 'C':
453 case 'D':
454 case 'E':
455 case 'F':
456 code = (*in - 'A' + 10) << 4;
457 break;
458
459 // TODO(edisonn): spec does not say how to handle this error
460 default:
461 break;
462 }
463
464 in++; // advance
465
466 switch (*in) {
467 case '0':
468 case '1':
469 case '2':
470 case '3':
471 case '4':
472 case '5':
473 case '6':
474 case '7':
475 case '8':
476 case '9':
477 code += (*in - '0');
478 break;
479
480 case 'a':
481 case 'b':
482 case 'c':
483 case 'd':
484 case 'e':
485 case 'f':
486 code += (*in - 'a' + 10);
487 break;
488
489 case 'A':
490 case 'B':
491 case 'C':
492 case 'D':
493 case 'E':
494 case 'F':
495 code += (*in - 'A' + 10);
496 break;
497
498 // TODO(edisonn): spec does not say how to handle this error
499 default:
500 break;
501 }
502
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000503 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000504 out++;
505 in++;
506 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000507 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000508 out++;
509 in++;
510 }
511 }
512
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000513 if (hasOut) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000514 return in; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000515 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000516 // return where the string would end if we reuse the string
517 return start + (out - (const unsigned char*)NULL);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000518 }
519}
520
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000521static int readNameLength(const unsigned char* start, const unsigned char* end) {
522 return readName(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000523}
524
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000525static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
526 SkPdfNativeObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000527 if (!allocator) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000528 // TODO(edisonn): report error/warn/assert
edisonn@google.comb44334c2013-07-23 20:47:05 +0000529 return end;
530 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000531 int outLength = readNameLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000532 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000533 const unsigned char* now = readName(start, end, out);
534 SkPdfNativeObject::makeName(out, out + outLength, name);
535 //PUT_TRACK_STREAM(start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000536 TRACE_NAME(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000537 return now;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000538}
539
540// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
541// that makes for an interesting scenario, where the stream itself contains endstream, together
542// with a reference object with the length, but the real length object would be somewhere else
543// it could confuse the parser
544/*example:
545
5467 0 obj
547<< /length 8 0 R>>
548stream
549...............
550endstream
5518 0 obj #we are in stream actually, not a real object
552<< 10 >> #we are in stream actually, not a real object
553endobj
554endstream
5558 0 obj #real obj
556<< 100 >> #real obj
557endobj
558and it could get worse, with multiple object like this
559*/
560
561// right now implement the silly algorithm that assumes endstream is finishing the stream
562
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000563static const unsigned char* readStream(const unsigned char* start, const unsigned char* end,
564 SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000565 start = skipPdfWhiteSpaces(start, end);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000566 if (!( start[0] == 's' &&
567 start[1] == 't' &&
568 start[2] == 'r' &&
569 start[3] == 'e' &&
570 start[4] == 'a' &&
571 start[5] == 'm')) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000572 // no stream. return.
573 return start;
574 }
575
576 start += 6; // strlen("stream")
577 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
578 start += 2;
579 } else if (start[0] == kLF_PdfWhiteSpace) {
580 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000581 } else if (isPdfWhiteSpace(start[0])) {
582 start += 1;
583 } else {
584 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000585 }
586
587 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
588 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000589 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000590
591 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000592 if (stream->has_Length() && stream->Length(doc) > 0) {
593 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000594 }
595
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000596 // TODO(edisonn): load external streams
597 // TODO(edisonn): look at the last filter, to determine how to deal with possible parsing
598 // issues. The last filter can have special rules to terminate a stream, which we could
599 // use to determine end of stream.
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000600
601 if (length >= 0) {
602 const unsigned char* endstream = start + length;
603
604 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
605 endstream += 2;
606 } else if (endstream[0] == kLF_PdfWhiteSpace) {
607 endstream += 1;
608 }
609
610 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
611 length = -1;
612 }
613 }
614
edisonn@google.com571c70b2013-07-10 17:09:50 +0000615 if (length < 0) {
616 // scan the buffer, until we find first endstream
617 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000618 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end,
619 "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000620
621 if (endstream) {
622 length = endstream - start;
623 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000624 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000625 }
626 }
627 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000628 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000629
630 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
631 endstream += 2;
632 } else if (endstream[0] == kLF_PdfWhiteSpace) {
633 endstream += 1;
634 }
635
636 // TODO(edisonn): verify the next bytes are "endstream"
637
638 endstream += strlen("endstream");
639 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000640 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000641 return endstream;
642 }
643 return start;
644}
645
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000646static const unsigned char* readInlineImageStream(const unsigned char* start,
647 const unsigned char* end,
648 SkPdfImageDictionary* inlineImage,
649 SkPdfNativeDoc* doc) {
edisonn@google.com78b38b12013-07-15 18:20:58 +0000650 // We already processed ID keyword, and we should be positioned immediately after it
651
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000652 // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end,
653 // instead of this if.
654 //if (end - start <= 2) {
655 // // TODO(edisonn): warning?
656 // return end; // but can we have a pixel image encoded in 1-2 bytes?
657 //}
658
edisonn@google.com78b38b12013-07-15 18:20:58 +0000659 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
660 start += 2;
661 } else if (start[0] == kLF_PdfWhiteSpace) {
662 start += 1;
663 } else if (isPdfWhiteSpace(start[0])) {
664 start += 1;
665 } else {
666 SkASSERT(isPdfDelimiter(start[0]));
667 // TODO(edisonn): warning?
668 }
669
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000670 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
671 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000672
673 if (endstream) {
674 int length = endstream - start;
675 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
676 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
677 inlineImage->addStream(start, (size_t)length);
678 } else {
679 // TODO(edisonn): report error in inline image stream (ID-EI) section
680 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
681 return end;
682 }
683 return endEI;
684}
685
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000686static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end,
687 SkPdfNativeObject* dict,
688 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000689 if (allocator == NULL) {
690 // TODO(edisonn): report/warning error
691 return end;
692 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000693 SkPdfNativeObject::makeEmptyDictionary(dict);
694 // PUT_TRACK_STREAM(dict, start, start)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000695
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000696 start = skipPdfWhiteSpaces(start, end);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000697 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000698
699 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000700 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000701 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000702 start++;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000703 start = readName(start, end, &key, &tmpStorage);
704 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000705
706 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000707 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000708 start = nextObject(start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000709
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000710 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000711
712 if (start < end) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000713 // We should have an indirect reference
edisonn@google.com571c70b2013-07-10 17:09:50 +0000714 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000715 SkPdfNativeObject generation;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000716 start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000717
edisonn@google.com3aa35552013-08-14 18:26:20 +0000718 SkPdfNativeObject keywordR;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000719 start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000720
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000721 if (value->isInteger() && generation.isInteger() &&
722 keywordR.isKeywordReference()) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000723 int64_t id = value->intValue();
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000724 SkPdfNativeObject::resetAndMakeReference(
725 (unsigned int)id,
726 (unsigned int)generation.intValue(),
727 value);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000728 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000729 dict->set(&key, value);
730 } else {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000731 // TODO(edisonn) error?, ignore it for now.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000732 dict->set(&key, value);
733 }
734 } else {
735 // next elem is not a digit, but it might not be / either!
736 dict->set(&key, value);
737 }
738 } else {
739 // /key >>
740 dict->set(&key, value);
741 return end;
742 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000743 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000744 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000745 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000746 return end;
747 }
748 }
749
edisonn@google.com571c70b2013-07-10 17:09:50 +0000750 // now we should expect >>
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000751 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000752 if (*start != kClosedInequityBracket_PdfDelimiter) {
753 // TODO(edisonn): report/warning
754 }
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000755
edisonn@google.com78b38b12013-07-15 18:20:58 +0000756 start++; // skip >
757 if (*start != kClosedInequityBracket_PdfDelimiter) {
758 // TODO(edisonn): report/warning
759 }
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000760
edisonn@google.com78b38b12013-07-15 18:20:58 +0000761 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000762
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000763 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
edisonn@google.combca421b2013-09-05 20:00:21 +0000764
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000765 start = readStream(start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000766
767 return start;
768}
769
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000770const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
771 SkPdfNativeObject* token,
772 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000773 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000774
775 // skip white spaces
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000776 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000777
edisonn@google.comaf54a512013-09-13 19:33:42 +0000778 if (start >= end) {
779 return end;
780 }
781
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000782 current = endOfPdfToken(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000783
784 // no token, len would be 0
edisonn@google.comaf54a512013-09-13 19:33:42 +0000785 if (current == start || current == end) {
786 return end;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000787 }
788
789 int tokenLen = current - start;
790
791 if (tokenLen == 1) {
792 // start array
793 switch (*start) {
794 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000795 return readArray(current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000796
797 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000798 return readString(start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000799
800 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com571c70b2013-07-10 17:09:50 +0000801 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
802 // TODO(edisonn): pass here the length somehow?
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000803 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000804 } else {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000805 return readHexString(start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000806 }
807
808 case kNamed_PdfDelimiter:
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000809 return readName(start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000810
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000811 // TODO(edisonn): what to do curly brackets?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000812 case kOpenedCurlyBracket_PdfDelimiter:
813 default:
814 break;
815 }
816
817 SkASSERT(!isPdfWhiteSpace(*start));
818 if (isPdfDelimiter(*start)) {
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000819 // TODO(edisonn): how unexpected stream ] } > ) will be handled?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000820 // for now ignore, and it will become a keyword to be ignored
821 }
822 }
823
824 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000825 SkPdfNativeObject::makeNull(token);
826 // PUT_TRACK_STREAM(start, start + 4)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000827 return current;
828 }
829
830 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000831 SkPdfNativeObject::makeBoolean(true, token);
832 // PUT_TRACK_STREAM(start, start + 4)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000833 return current;
834 }
835
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000836 // TODO(edisonn): again, make all buffers have 5 extra bytes
837 if (tokenLen == 5 && start[0] == 'f' &&
838 start[1] == 'a' &&
839 start[2] == 'l' &&
840 start[3] == 's' &&
841 start[4] == 'e') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000842 SkPdfNativeObject::makeBoolean(false, token);
843 // PUT_TRACK_STREAM(start, start + 5)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000844 return current;
845 }
846
847 if (isPdfNumeric(*start)) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000848 SkPdfNativeObject::makeNumeric(start, current, token);
849 // PUT_TRACK_STREAM(start, current)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000850 } else {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000851 SkPdfNativeObject::makeKeyword(start, current, token);
852 // PUT_TRACK_STREAM(start, current)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000853 }
854 return current;
855}
856
edisonn@google.com3aa35552013-08-14 18:26:20 +0000857SkPdfNativeObject* SkPdfAllocator::allocBlock() {
858 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
859 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000860}
861
862SkPdfAllocator::~SkPdfAllocator() {
863 for (int i = 0 ; i < fHandles.count(); i++) {
864 free(fHandles[i]);
865 }
866 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000867 for (int j = 0 ; j < BUFFER_SIZE; j++) {
868 fHistory[i][j].reset();
869 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000870 delete[] fHistory[i];
871 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000872 for (int j = 0 ; j < BUFFER_SIZE; j++) {
873 fCurrent[j].reset();
874 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000875 delete[] fCurrent;
876}
877
edisonn@google.com3aa35552013-08-14 18:26:20 +0000878SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000879 if (fCurrentUsed >= BUFFER_SIZE) {
880 fHistory.push(fCurrent);
881 fCurrent = allocBlock();
882 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000883 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000884 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000885 fCurrentUsed++;
886 return &fCurrent[fCurrentUsed - 1];
887}
888
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000889// TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache the result,
890// so there is no need of a second pass
891SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
892 SkPdfAllocator* allocator,
893 SkPdfNativeDoc* doc)
894 : fDoc(doc)
895 , fAllocator(allocator)
896 , fUncompressedStream(NULL)
897 , fUncompressedStreamEnd(NULL)
898 , fEmpty(false)
899 , fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000900 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000901 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000902 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000903 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
904 // we need to do now for perf, and our generated pdfs do not have comments,
905 // but we need to remove this hack for pdfs in the wild
edisonn@google.com78b38b12013-07-15 18:20:58 +0000906 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000907 if (endobj) {
908 len = endobj - (char*)buffer + strlen("endobj");
909 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000910 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000911 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000912}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000913
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000914SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len,
915 SkPdfAllocator* allocator,
916 SkPdfNativeDoc* doc) : fDoc(doc)
917 , fAllocator(allocator)
918 , fEmpty(false)
919 , fHasPutBack(false) {
920 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
921 // we need to do now for perf, and our generated pdfs do not have comments,
922 // but we need to remove this hack for pdfs in the wild
edisonn@google.com78b38b12013-07-15 18:20:58 +0000923 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000924 if (endobj) {
925 len = endobj - (char*)buffer + strlen("endobj");
926 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000927 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000928 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000929}
930
931SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000932}
933
934bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com91ce6982013-08-05 20:45:40 +0000935#ifdef PDF_TRACE_READ_TOKEN
936 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000937#endif
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000938
edisonn@google.com571c70b2013-07-10 17:09:50 +0000939 token->fKeyword = NULL;
940 token->fObject = NULL;
941
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000942 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000943 if (fUncompressedStream >= fUncompressedStreamEnd) {
scroggo@google.com5092adc2013-12-02 20:22:31 +0000944 fEmpty = true;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000945 return false;
946 }
947
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000948 SkPdfNativeObject obj;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000949 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
950 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000951
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000952 // If it is a keyword, we will only get the pointer of the string.
edisonn@google.com3aa35552013-08-14 18:26:20 +0000953 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000954 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000955 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000956 token->fType = kKeyword_TokenType;
957 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000958 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000959 *pobj = obj;
960 token->fObject = pobj;
961 token->fType = kObject_TokenType;
962 }
963
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000964#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000965 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000966#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000967 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000968 printf("break;\n");
969 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000970#endif
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000971 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object",
972 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
973 token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000974#endif
975
976 return true;
977}
978
979void SkPdfNativeTokenizer::PutBack(PdfToken token) {
980 SkASSERT(!fHasPutBack);
981 fHasPutBack = true;
982 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000983#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000984 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object",
985 token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str() :
986 token.fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000987#endif
988}
989
scroggo@google.com5092adc2013-12-02 20:22:31 +0000990bool SkPdfNativeTokenizer::readToken(PdfToken* token, bool writeDiff) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000991 if (fHasPutBack) {
992 *token = fPutBack;
993 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000994#ifdef PDF_TRACE_READ_TOKEN
scroggo@google.com5092adc2013-12-02 20:22:31 +0000995 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object",
996 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
997 token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000998#endif
scroggo@google.com5092adc2013-12-02 20:22:31 +0000999 if (writeDiff) {
1000 SkPdfDiffEncoder::WriteToFile(token);
1001 }
edisonn@google.com571c70b2013-07-10 17:09:50 +00001002 return true;
1003 }
1004
1005 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001006#ifdef PDF_TRACE_READ_TOKEN
scroggo@google.com5092adc2013-12-02 20:22:31 +00001007 printf("EMPTY TOKENIZER\n");
edisonn@google.com571c70b2013-07-10 17:09:50 +00001008#endif
1009 return false;
1010 }
1011
scroggo@google.com5092adc2013-12-02 20:22:31 +00001012 const bool result = readTokenCore(token);
1013 if (result && writeDiff) {
1014 SkPdfDiffEncoder::WriteToFile(token);
1015 }
1016 return result;
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001017}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001018
1019#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1020
1021// keys
1022DECLARE_PDF_NAME(BitsPerComponent);
1023DECLARE_PDF_NAME(ColorSpace);
1024DECLARE_PDF_NAME(Decode);
1025DECLARE_PDF_NAME(DecodeParms);
1026DECLARE_PDF_NAME(Filter);
1027DECLARE_PDF_NAME(Height);
1028DECLARE_PDF_NAME(ImageMask);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001029DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations?
edisonn@google.com78b38b12013-07-15 18:20:58 +00001030DECLARE_PDF_NAME(Interpolate);
1031DECLARE_PDF_NAME(Width);
1032
1033// values
1034DECLARE_PDF_NAME(DeviceGray);
1035DECLARE_PDF_NAME(DeviceRGB);
1036DECLARE_PDF_NAME(DeviceCMYK);
1037DECLARE_PDF_NAME(Indexed);
1038DECLARE_PDF_NAME(ASCIIHexDecode);
1039DECLARE_PDF_NAME(ASCII85Decode);
1040DECLARE_PDF_NAME(LZWDecode);
1041DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1042DECLARE_PDF_NAME(RunLengthDecode);
1043DECLARE_PDF_NAME(CCITTFaxDecode);
1044DECLARE_PDF_NAME(DCTDecode);
1045
1046#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1047
1048
edisonn@google.com3aa35552013-08-14 18:26:20 +00001049static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001050 if (!key || !key->isName()) {
1051 return key;
1052 }
1053
1054 // TODO(edisonn): use autogenerated code!
1055 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1056 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1057 HANDLE_NAME_ABBR(key, Decode, D);
1058 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1059 HANDLE_NAME_ABBR(key, Filter, F);
1060 HANDLE_NAME_ABBR(key, Height, H);
1061 HANDLE_NAME_ABBR(key, ImageMask, IM);
1062// HANDLE_NAME_ABBR(key, Intent, );
1063 HANDLE_NAME_ABBR(key, Interpolate, I);
1064 HANDLE_NAME_ABBR(key, Width, W);
1065
1066 return key;
1067}
1068
edisonn@google.com3aa35552013-08-14 18:26:20 +00001069static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001070 if (!value || !value->isName()) {
1071 return value;
1072 }
1073
1074 // TODO(edisonn): use autogenerated code!
1075 HANDLE_NAME_ABBR(value, DeviceGray, G);
1076 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1077 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1078 HANDLE_NAME_ABBR(value, Indexed, I);
1079 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1080 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1081 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1082 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1083 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1084 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1085 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1086
1087 return value;
1088}
1089
1090SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1091 // BI already processed
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001092 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001093 if (fUncompressedStream >= fUncompressedStreamEnd) {
1094 return NULL;
1095 }
1096
1097 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001098 SkPdfNativeObject::makeEmptyDictionary(inlineImage);
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001099 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart,
1100 // fUncompressedStream - fUncompressedStreamStart)
edisonn@google.com78b38b12013-07-15 18:20:58 +00001101
1102 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001103 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001104 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key,
1105 fAllocator, fDoc);
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001106 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
edisonn@google.com78b38b12013-07-15 18:20:58 +00001107
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001108 if (key->isKeyword() && key->lenstr() == 2 &&
1109 key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
1110 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd,
1111 inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001112 return inlineImage;
1113 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001114 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001115 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj,
1116 fAllocator, fDoc);
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001117 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
edisonn@google.comc8fda9d2013-10-09 20:23:12 +00001118 // TODO(edisonn): perf maybe we should not expand abBreviation like this
edisonn@google.com78b38b12013-07-15 18:20:58 +00001119 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1120 inlineImageValueAbbreviationExpand(obj));
1121 }
1122 }
1123 // TODO(edisonn): report end of data with inline image without an EI
1124 return inlineImage;
1125}