blob: 64388552a51de46ac756b0feb241c5ad868a3e56 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
8#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +00009#include "SkPdfNativeObject.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +000010#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000011
edisonn@google.com33f11b62013-08-14 21:35:27 +000012// TODO(edisonn): mac builder does not find the header ... but from headers is ok
13//#include "SkPdfStreamCommonDictionary_autogen.h"
14//#include "SkPdfImageDictionary_autogen.h"
15#include "SkPdfHeaders_autogen.h"
16
edisonn@google.com78b38b12013-07-15 18:20:58 +000017
18// TODO(edisonn): perf!!!
19// there could be 0s between start and end! but not in the needle.
20static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
21 int needleLen = strlen(needle);
22 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26
27 hayStart++;
28
29 while (hayStart < hayEnd) {
30 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
31 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
32 strncmp(hayStart, needle, needleLen) == 0) {
33 return hayStart;
34 }
35 hayStart++;
36 }
37 return NULL;
38}
39
edisonn@google.come2e01ff2013-08-02 20:24:48 +000040#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000041static void TRACE_INDENT(int level, const char* type) {
42 static int id = 0;
43 id++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +000044#if 0
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000045 if (478613 == id) {
46 printf("break;\n");
47 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +000048#endif
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000049 // all types should have 2 letters, so the text is alligned nicely
50 printf("\n%10i %15s: ", id, type);
51 for (int i = 0 ; i < level; i++) {
52 printf(" ");
53 }
54}
edisonn@google.com3aac1f92013-07-02 22:42:53 +000055
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000056static void TRACE_COMMENT(char ch) {
57 printf("%c", ch);
58}
59
60static void TRACE_TK(char ch) {
61 printf("%c", ch);
62}
63
64static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
65 while (start < end) {
66 printf("%c", *start);
67 start++;
68 }
69 printf("\n");
70}
71
72static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
73 while (start < end) {
74 printf("%c", *start);
75 start++;
76 }
77 printf("\n");
78}
79
80static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
81 while (start < end) {
82 printf("%c", *start);
83 start++;
84 }
85 printf("\n");
86}
87
88#else
89#define TRACE_INDENT(level,type)
90#define TRACE_COMMENT(ch)
91#define TRACE_TK(ch)
92#define TRACE_NAME(start,end)
93#define TRACE_STRING(start,end)
94#define TRACE_HEXSTRING(start,end)
95#endif
96
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000097const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000098 TRACE_INDENT(level, "White Space");
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000099 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000100 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000101 if (*start == kComment_PdfDelimiter) {
102 // skip the comment until end of line
103 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000104 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000105 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000106 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107 }
108 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000109 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110 start++;
111 }
112 }
113 return start;
114}
115
116// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000117const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000118 //int opened brackets
119 //TODO(edisonn): what out for special chars, like \n, \032
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000120 TRACE_INDENT(level, "Token");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121
122 SkASSERT(!isPdfWhiteSpace(*start));
123
124 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000125 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126 start++;
127 return start;
128 }
129
130 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000131 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132 start++;
133 }
134 return start;
135}
136
edisonn@google.com571c70b2013-07-10 17:09:50 +0000137// last elem has to be ]
edisonn@google.combca421b2013-09-05 20:00:21 +0000138static const unsigned char* readArray(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* array, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM) {
139 SkPdfNativeObject::makeEmptyArray(array PUT_TRACK_STREAM(start, start));
140
edisonn@google.com1f080162013-07-23 21:05:49 +0000141 if (allocator == NULL) {
142 // TODO(edisonn): report/warning error
143 return end;
144 }
145
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000146 TRACE_INDENT(level, "Array");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000147 while (start < end) {
148 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000149 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000150
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000151 const unsigned char* endOfToken = endOfPdfToken(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000152
153 if (endOfToken == start) {
154 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
155 return start;
156 }
157
158 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
159 return endOfToken;
160 }
161
edisonn@google.com3aa35552013-08-14 18:26:20 +0000162 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000163 start = nextObject(level + 1, start, end, newObj, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000164 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
165 // we are sure they are not references!
166 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000167 SkPdfNativeObject* gen = array->removeLastInArray();
168 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.combca421b2013-09-05 20:00:21 +0000169
170 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj));
171
edisonn@google.com571c70b2013-07-10 17:09:50 +0000172 }
173 array->appendInArray(newObj);
174 }
175 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000176 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
177 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000178 return start;
179}
180
181// When we read strings we will rewrite the string so we will reuse the memory
182// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000183
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000184// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
185
186static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
187 TRACE_INDENT(level, "String");
188 const unsigned char* in = start;
189 bool hasOut = (out != NULL);
190
191 int openRoundBrackets = 1;
192 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000193 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
194 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000195 if (openRoundBrackets == 0) {
196 in++; // consumed )
197 break;
198 }
199
edisonn@google.com571c70b2013-07-10 17:09:50 +0000200 if (*in == kEscape_PdfSpecial) {
201 if (in + 1 < end) {
202 switch (in[1]) {
203 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000204 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000205 out++;
206 in += 2;
207 break;
208
209 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000210 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000211 out++;
212 in += 2;
213 break;
214
215 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000216 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000217 out++;
218 in += 2;
219 break;
220
221 case 'b':
222 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000223 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000224 out++;
225 in += 2;
226 break;
227
228 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000229 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000230 out++;
231 in += 2;
232 break;
233
234 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000235 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000236 out++;
237 in += 2;
238 break;
239
240 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000241 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000242 out++;
243 in += 2;
244 break;
245
246 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000247 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000248 out++;
249 in += 2;
250 break;
251
252 case '0':
253 case '1':
254 case '2':
255 case '3':
256 case '4':
257 case '5':
258 case '6':
259 case '7': {
260 //read octals
261 in++; // consume backslash
262
263 int code = 0;
264 int i = 0;
265 while (in < end && *in >= '0' && *in < '8') {
266 code = (code << 3) + ((*in) - '0'); // code * 8 + d
267 i++;
268 in++;
269 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000270 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000271 out++;
272 i = 0;
273 }
274 }
275 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000276 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000277 out++;
278 }
279 }
280 break;
281
282 default:
283 // Per spec, backslash is ignored is escaped ch is unknown
284 in++;
285 break;
286 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000287 } else {
288 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000289 }
290 } else {
291 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
292 // we could have one look that first just inc current, and when we find the backslash
293 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000294 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000295 in++;
296 out++;
297 }
298 }
299
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000300 if (hasOut) {
301 return in; // consumed already ) at the end of the string
302 } else {
303 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
304 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000305}
306
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000307static int readStringLength(int level, const unsigned char* start, const unsigned char* end) {
308 return readString(level, start, end, NULL) - start;
309}
310
edisonn@google.combca421b2013-09-05 20:00:21 +0000311static const unsigned char* readString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator GET_TRACK_STREAM) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000312 if (!allocator) {
313 return end;
314 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000315 int outLength = readStringLength(level, start, end);
316 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
317 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000318 const unsigned char* now = readString(level, start, end, out);
319 SkPdfNativeObject::makeString(out, out + outLength, str PUT_TRACK_STREAM(start, now));
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000320 TRACE_STRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000321 return now; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000322}
323
324static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
325 TRACE_INDENT(level, "HexString");
326 bool hasOut = (out != NULL);
327 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000328
329 unsigned char code = 0;
330
331 while (in < end) {
332 while (in < end && isPdfWhiteSpace(*in)) {
333 in++;
334 }
335
336 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000337 //*in = '\0';
338 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000339 // normal exit
340 break;
341 }
342
343 if (in >= end) {
344 // end too soon
345 break;
346 }
347
348 switch (*in) {
349 case '0':
350 case '1':
351 case '2':
352 case '3':
353 case '4':
354 case '5':
355 case '6':
356 case '7':
357 case '8':
358 case '9':
359 code = (*in - '0') << 4;
360 break;
361
362 case 'a':
363 case 'b':
364 case 'c':
365 case 'd':
366 case 'e':
367 case 'f':
368 code = (*in - 'a' + 10) << 4;
369 break;
370
371 case 'A':
372 case 'B':
373 case 'C':
374 case 'D':
375 case 'E':
376 case 'F':
377 code = (*in - 'A' + 10) << 4;
378 break;
379
380 // TODO(edisonn): spec does not say how to handle this error
381 default:
382 break;
383 }
384
385 in++; // advance
386
387 while (in < end && isPdfWhiteSpace(*in)) {
388 in++;
389 }
390
391 // TODO(edisonn): report error
392 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000393 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000394 out++;
395 break;
396 }
397
398 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000399 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000400 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000401 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000402 break;
403 }
404
405 switch (*in) {
406 case '0':
407 case '1':
408 case '2':
409 case '3':
410 case '4':
411 case '5':
412 case '6':
413 case '7':
414 case '8':
415 case '9':
416 code += (*in - '0');
417 break;
418
419 case 'a':
420 case 'b':
421 case 'c':
422 case 'd':
423 case 'e':
424 case 'f':
425 code += (*in - 'a' + 10);
426 break;
427
428 case 'A':
429 case 'B':
430 case 'C':
431 case 'D':
432 case 'E':
433 case 'F':
434 code += (*in - 'A' + 10);
435 break;
436
437 // TODO(edisonn): spec does not say how to handle this error
438 default:
439 break;
440 }
441
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000442 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000443 out++;
444 in++;
445 }
446
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000447 if (hasOut) {
448 return in; // consumed already > at the end of the string
449 } else {
450 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000451 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000452}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000453
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000454static int readHexStringLength(int level, const unsigned char* start, const unsigned char* end) {
455 return readHexString(level, start, end, NULL) - start;
456}
457
edisonn@google.combca421b2013-09-05 20:00:21 +0000458static const unsigned char* readHexString(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator GET_TRACK_STREAM) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000459 if (!allocator) {
460 return end;
461 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000462 int outLength = readHexStringLength(level, start, end);
463 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
464 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000465 const unsigned char* now = readHexString(level, start, end, out);
466 SkPdfNativeObject::makeHexString(out, out + outLength, str PUT_TRACK_STREAM(start, now));
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000467 TRACE_HEXSTRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000468 return now; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000469}
470
471// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000472static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, unsigned char* out) {
473 TRACE_INDENT(level, "Name");
474 bool hasOut = (out != NULL);
475 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000476
477 unsigned char code = 0;
478
479 while (in < end) {
480 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
481 break;
482 }
483
484 if (*in == '#' && in + 2 < end) {
485 in++;
486 switch (*in) {
487 case '0':
488 case '1':
489 case '2':
490 case '3':
491 case '4':
492 case '5':
493 case '6':
494 case '7':
495 case '8':
496 case '9':
497 code = (*in - '0') << 4;
498 break;
499
500 case 'a':
501 case 'b':
502 case 'c':
503 case 'd':
504 case 'e':
505 case 'f':
506 code = (*in - 'a' + 10) << 4;
507 break;
508
509 case 'A':
510 case 'B':
511 case 'C':
512 case 'D':
513 case 'E':
514 case 'F':
515 code = (*in - 'A' + 10) << 4;
516 break;
517
518 // TODO(edisonn): spec does not say how to handle this error
519 default:
520 break;
521 }
522
523 in++; // advance
524
525 switch (*in) {
526 case '0':
527 case '1':
528 case '2':
529 case '3':
530 case '4':
531 case '5':
532 case '6':
533 case '7':
534 case '8':
535 case '9':
536 code += (*in - '0');
537 break;
538
539 case 'a':
540 case 'b':
541 case 'c':
542 case 'd':
543 case 'e':
544 case 'f':
545 code += (*in - 'a' + 10);
546 break;
547
548 case 'A':
549 case 'B':
550 case 'C':
551 case 'D':
552 case 'E':
553 case 'F':
554 code += (*in - 'A' + 10);
555 break;
556
557 // TODO(edisonn): spec does not say how to handle this error
558 default:
559 break;
560 }
561
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000562 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000563 out++;
564 in++;
565 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000566 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000567 out++;
568 in++;
569 }
570 }
571
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000572 if (hasOut) {
573 return in;
574 } else {
575 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
576 }
577}
578
579static int readNameLength(int level, const unsigned char* start, const unsigned char* end) {
580 return readName(level, start, end, NULL) - start;
581}
582
edisonn@google.combca421b2013-09-05 20:00:21 +0000583static const unsigned char* readName(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* name, SkPdfAllocator* allocator GET_TRACK_STREAM) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000584 if (!allocator) {
585 return end;
586 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000587 int outLength = readNameLength(level, start, end);
588 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
589 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000590 const unsigned char* now = readName(level, start, end, out);
591 SkPdfNativeObject::makeName(out, out + outLength, name PUT_TRACK_STREAM(start, now));
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000592 TRACE_NAME(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000593 return now;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000594}
595
596// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
597// that makes for an interesting scenario, where the stream itself contains endstream, together
598// with a reference object with the length, but the real length object would be somewhere else
599// it could confuse the parser
600/*example:
601
6027 0 obj
603<< /length 8 0 R>>
604stream
605...............
606endstream
6078 0 obj #we are in stream actually, not a real object
608<< 10 >> #we are in stream actually, not a real object
609endobj
610endstream
6118 0 obj #real obj
612<< 100 >> #real obj
613endobj
614and it could get worse, with multiple object like this
615*/
616
617// right now implement the silly algorithm that assumes endstream is finishing the stream
618
619
edisonn@google.com3aa35552013-08-14 18:26:20 +0000620static const unsigned char* readStream(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000621 TRACE_INDENT(level, "Stream");
622 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000623 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
624 // no stream. return.
625 return start;
626 }
627
628 start += 6; // strlen("stream")
629 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
630 start += 2;
631 } else if (start[0] == kLF_PdfWhiteSpace) {
632 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000633 } else if (isPdfWhiteSpace(start[0])) {
634 start += 1;
635 } else {
636 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
637 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000638 }
639
640 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
641 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000642 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000643
644 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000645 if (stream->has_Length() && stream->Length(doc) > 0) {
646 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000647 }
648
649 // TODO(edisonn): laod external streams
650 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
651
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000652
653 if (length >= 0) {
654 const unsigned char* endstream = start + length;
655
656 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
657 endstream += 2;
658 } else if (endstream[0] == kLF_PdfWhiteSpace) {
659 endstream += 1;
660 }
661
662 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
663 length = -1;
664 }
665 }
666
edisonn@google.com571c70b2013-07-10 17:09:50 +0000667 if (length < 0) {
668 // scan the buffer, until we find first endstream
669 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000670 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000671
672 if (endstream) {
673 length = endstream - start;
674 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000675 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000676 }
677 }
678 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000679 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000680
681 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
682 endstream += 2;
683 } else if (endstream[0] == kLF_PdfWhiteSpace) {
684 endstream += 1;
685 }
686
687 // TODO(edisonn): verify the next bytes are "endstream"
688
689 endstream += strlen("endstream");
690 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000691 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000692 return endstream;
693 }
694 return start;
695}
696
edisonn@google.com3aa35552013-08-14 18:26:20 +0000697static const unsigned char* readInlineImageStream(int level, const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000698 TRACE_INDENT(level, "Inline Image");
edisonn@google.com78b38b12013-07-15 18:20:58 +0000699 // We already processed ID keyword, and we should be positioned immediately after it
700
701 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
702 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
703 start += 2;
704 } else if (start[0] == kLF_PdfWhiteSpace) {
705 start += 1;
706 } else if (isPdfWhiteSpace(start[0])) {
707 start += 1;
708 } else {
709 SkASSERT(isPdfDelimiter(start[0]));
710 // TODO(edisonn): warning?
711 }
712
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000713 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
714 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000715
716 if (endstream) {
717 int length = endstream - start;
718 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
719 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
720 inlineImage->addStream(start, (size_t)length);
721 } else {
722 // TODO(edisonn): report error in inline image stream (ID-EI) section
723 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
724 return end;
725 }
726 return endEI;
727}
728
edisonn@google.combca421b2013-09-05 20:00:21 +0000729static const unsigned char* readDictionary(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000730 if (allocator == NULL) {
731 // TODO(edisonn): report/warning error
732 return end;
733 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000734 TRACE_INDENT(level, "Dictionary");
edisonn@google.combca421b2013-09-05 20:00:21 +0000735 SkPdfNativeObject::makeEmptyDictionary(dict PUT_TRACK_STREAM(start, start));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000736
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000737 start = skipPdfWhiteSpaces(level, start, end);
738 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000739
740 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000741 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000742 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000743 start++;
edisonn@google.combca421b2013-09-05 20:00:21 +0000744 start = readName(level + 1, start, end, &key, &tmpStorage PUT_TRACK_STREAM_ARGS);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000745 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000746
747 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000748 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000749 start = nextObject(level + 1, start, end, value, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000750
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000751 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000752
753 if (start < end) {
754 // seems we have an indirect reference
755 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000756 SkPdfNativeObject generation;
edisonn@google.combca421b2013-09-05 20:00:21 +0000757 start = nextObject(level + 1, start, end, &generation, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000758
edisonn@google.com3aa35552013-08-14 18:26:20 +0000759 SkPdfNativeObject keywordR;
edisonn@google.combca421b2013-09-05 20:00:21 +0000760 start = nextObject(level + 1, start, end, &keywordR, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000761
762 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
763 int64_t id = value->intValue();
edisonn@google.combca421b2013-09-05 20:00:21 +0000764 SkPdfNativeObject::resetAndMakeReference((unsigned int)id, (unsigned int)generation.intValue(), value PUT_TRACK_PARAMETERS_OBJ2(value, &generation));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000765 dict->set(&key, value);
766 } else {
767 // error, ignore
768 dict->set(&key, value);
769 }
770 } else {
771 // next elem is not a digit, but it might not be / either!
772 dict->set(&key, value);
773 }
774 } else {
775 // /key >>
776 dict->set(&key, value);
777 return end;
778 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000779 start = skipPdfWhiteSpaces(level + 1, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000780 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000781 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000782 return end;
783 }
784 }
785
786 // TODO(edisonn): options to ignore these errors
787
788 // now we should expect >>
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000789 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000790 if (*start != kClosedInequityBracket_PdfDelimiter) {
791 // TODO(edisonn): report/warning
792 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000793 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000794 start++; // skip >
795 if (*start != kClosedInequityBracket_PdfDelimiter) {
796 // TODO(edisonn): report/warning
797 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000798 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000799 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000800
edisonn@google.combca421b2013-09-05 20:00:21 +0000801 STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
802
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000803 start = readStream(level, start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000804
805 return start;
806}
807
edisonn@google.combca421b2013-09-05 20:00:21 +0000808const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000809 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000810
811 // skip white spaces
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000812 start = skipPdfWhiteSpaces(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000813
edisonn@google.comaf54a512013-09-13 19:33:42 +0000814 if (start >= end) {
815 return end;
816 }
817
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000818 current = endOfPdfToken(level, start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000819
820 // no token, len would be 0
edisonn@google.comaf54a512013-09-13 19:33:42 +0000821 if (current == start || current == end) {
822 return end;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000823 }
824
825 int tokenLen = current - start;
826
827 if (tokenLen == 1) {
828 // start array
829 switch (*start) {
830 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000831 //*start = '\0';
edisonn@google.combca421b2013-09-05 20:00:21 +0000832 return readArray(level + 1, current, end, token, allocator, doc PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000833
834 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000835 //*start = '\0';
edisonn@google.combca421b2013-09-05 20:00:21 +0000836 return readString(level, start + 1, end, token, allocator PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000837
838 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000839 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000840 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000841 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000842 // TODO(edisonn): pass here the length somehow?
edisonn@google.combca421b2013-09-05 20:00:21 +0000843 return readDictionary(level + 1, start + 2, end, token, allocator, doc PUT_TRACK_STREAM_ARGS); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000844 } else {
edisonn@google.combca421b2013-09-05 20:00:21 +0000845 return readHexString(level, start + 1, end, token, allocator PUT_TRACK_STREAM_ARGS); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000846 }
847
848 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000849 //*start = '\0';
edisonn@google.combca421b2013-09-05 20:00:21 +0000850 return readName(level, start + 1, end, token, allocator PUT_TRACK_STREAM_ARGS);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000851
852 // TODO(edisonn): what to do curly brackets? read spec!
853 case kOpenedCurlyBracket_PdfDelimiter:
854 default:
855 break;
856 }
857
858 SkASSERT(!isPdfWhiteSpace(*start));
859 if (isPdfDelimiter(*start)) {
860 // TODO(edisonn): how stream ] } > ) will be handled?
861 // for now ignore, and it will become a keyword to be ignored
862 }
863 }
864
865 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.combca421b2013-09-05 20:00:21 +0000866 SkPdfNativeObject::makeNull(token PUT_TRACK_STREAM(start, start + 4));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000867 return current;
868 }
869
870 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.combca421b2013-09-05 20:00:21 +0000871 SkPdfNativeObject::makeBoolean(true, token PUT_TRACK_STREAM(start, start + 4));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000872 return current;
873 }
874
edisonn@google.comf111a4b2013-07-31 18:22:36 +0000875 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.combca421b2013-09-05 20:00:21 +0000876 SkPdfNativeObject::makeBoolean(false, token PUT_TRACK_STREAM(start, start + 5));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000877 return current;
878 }
879
880 if (isPdfNumeric(*start)) {
edisonn@google.combca421b2013-09-05 20:00:21 +0000881 SkPdfNativeObject::makeNumeric(start, current, token PUT_TRACK_STREAM(start, current));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000882 } else {
edisonn@google.combca421b2013-09-05 20:00:21 +0000883 SkPdfNativeObject::makeKeyword(start, current, token PUT_TRACK_STREAM(start, current));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000884 }
885 return current;
886}
887
edisonn@google.com3aa35552013-08-14 18:26:20 +0000888SkPdfNativeObject* SkPdfAllocator::allocBlock() {
889 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
890 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000891}
892
893SkPdfAllocator::~SkPdfAllocator() {
894 for (int i = 0 ; i < fHandles.count(); i++) {
895 free(fHandles[i]);
896 }
897 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000898 for (int j = 0 ; j < BUFFER_SIZE; j++) {
899 fHistory[i][j].reset();
900 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000901 delete[] fHistory[i];
902 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000903 for (int j = 0 ; j < BUFFER_SIZE; j++) {
904 fCurrent[j].reset();
905 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000906 delete[] fCurrent;
907}
908
edisonn@google.com3aa35552013-08-14 18:26:20 +0000909SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000910 if (fCurrentUsed >= BUFFER_SIZE) {
911 fHistory.push(fCurrent);
912 fCurrent = allocBlock();
913 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000914 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000915 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000916 fCurrentUsed++;
917 return &fCurrent[fCurrentUsed - 1];
918}
919
920// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com33f11b62013-08-14 21:35:27 +0000921SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000922 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000923 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000924 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000925 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000926 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000927 if (endobj) {
928 len = endobj - (char*)buffer + strlen("endobj");
929 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000930 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000931 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000932}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000933
edisonn@google.com33f11b62013-08-14 21:35:27 +0000934SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000935 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000936 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000937 if (endobj) {
938 len = endobj - (char*)buffer + strlen("endobj");
939 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000940 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000941 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000942}
943
944SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000945}
946
947bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000948 SkPdfNativeObject obj;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000949#ifdef PDF_TRACE_READ_TOKEN
950 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000951#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000952 token->fKeyword = NULL;
953 token->fObject = NULL;
954
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000955 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000956 if (fUncompressedStream >= fUncompressedStreamEnd) {
957 return false;
958 }
959
edisonn@google.combca421b2013-09-05 20:00:21 +0000960 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000961
962 // If it is a keyword, we will only get the pointer of the string
edisonn@google.com3aa35552013-08-14 18:26:20 +0000963 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000964 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000965 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000966 token->fType = kKeyword_TokenType;
967 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000968 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000969 *pobj = obj;
970 token->fObject = pobj;
971 token->fType = kObject_TokenType;
972 }
973
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000974#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000975 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000976#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000977 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000978 printf("break;\n");
979 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000980#endif
edisonn@google.come91260c2013-09-04 17:29:06 +0000981 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000982#endif
983
984 return true;
985}
986
987void SkPdfNativeTokenizer::PutBack(PdfToken token) {
988 SkASSERT(!fHasPutBack);
989 fHasPutBack = true;
990 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000991#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.come91260c2013-09-04 17:29:06 +0000992 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000993#endif
994}
995
996bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
997 if (fHasPutBack) {
998 *token = fPutBack;
999 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001000#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.come91260c2013-09-04 17:29:06 +00001001 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +00001002#endif
1003 return true;
1004 }
1005
1006 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001007#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +00001008 printf("EMPTY TOKENIZER\n");
1009#endif
1010 return false;
1011 }
1012
1013 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001014}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001015
1016#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1017
1018// keys
1019DECLARE_PDF_NAME(BitsPerComponent);
1020DECLARE_PDF_NAME(ColorSpace);
1021DECLARE_PDF_NAME(Decode);
1022DECLARE_PDF_NAME(DecodeParms);
1023DECLARE_PDF_NAME(Filter);
1024DECLARE_PDF_NAME(Height);
1025DECLARE_PDF_NAME(ImageMask);
1026DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1027DECLARE_PDF_NAME(Interpolate);
1028DECLARE_PDF_NAME(Width);
1029
1030// values
1031DECLARE_PDF_NAME(DeviceGray);
1032DECLARE_PDF_NAME(DeviceRGB);
1033DECLARE_PDF_NAME(DeviceCMYK);
1034DECLARE_PDF_NAME(Indexed);
1035DECLARE_PDF_NAME(ASCIIHexDecode);
1036DECLARE_PDF_NAME(ASCII85Decode);
1037DECLARE_PDF_NAME(LZWDecode);
1038DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1039DECLARE_PDF_NAME(RunLengthDecode);
1040DECLARE_PDF_NAME(CCITTFaxDecode);
1041DECLARE_PDF_NAME(DCTDecode);
1042
1043#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1044
1045
edisonn@google.com3aa35552013-08-14 18:26:20 +00001046static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001047 if (!key || !key->isName()) {
1048 return key;
1049 }
1050
1051 // TODO(edisonn): use autogenerated code!
1052 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1053 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1054 HANDLE_NAME_ABBR(key, Decode, D);
1055 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1056 HANDLE_NAME_ABBR(key, Filter, F);
1057 HANDLE_NAME_ABBR(key, Height, H);
1058 HANDLE_NAME_ABBR(key, ImageMask, IM);
1059// HANDLE_NAME_ABBR(key, Intent, );
1060 HANDLE_NAME_ABBR(key, Interpolate, I);
1061 HANDLE_NAME_ABBR(key, Width, W);
1062
1063 return key;
1064}
1065
edisonn@google.com3aa35552013-08-14 18:26:20 +00001066static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001067 if (!value || !value->isName()) {
1068 return value;
1069 }
1070
1071 // TODO(edisonn): use autogenerated code!
1072 HANDLE_NAME_ABBR(value, DeviceGray, G);
1073 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1074 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1075 HANDLE_NAME_ABBR(value, Indexed, I);
1076 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1077 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1078 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1079 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1080 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1081 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1082 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1083
1084 return value;
1085}
1086
1087SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1088 // BI already processed
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001089 fUncompressedStream = skipPdfWhiteSpaces(0, fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001090 if (fUncompressedStream >= fUncompressedStreamEnd) {
1091 return NULL;
1092 }
1093
1094 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +00001095 SkPdfNativeObject::makeEmptyDictionary(inlineImage PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart, fUncompressedStream - fUncompressedStreamStart));
edisonn@google.com78b38b12013-07-15 18:20:58 +00001096
1097 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001098 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +00001099 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart));
edisonn@google.com78b38b12013-07-15 18:20:58 +00001100
edisonn@google.come878e722013-07-29 19:10:58 +00001101 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com2ccc3af2013-07-23 17:43:18 +00001102 fUncompressedStream = readInlineImageStream(0, fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001103 return inlineImage;
1104 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001105 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +00001106 fUncompressedStream = nextObject(0, fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart));
edisonn@google.com78b38b12013-07-15 18:20:58 +00001107 // TODO(edisonn): perf maybe we should not expand abreviation like this
1108 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1109 inlineImageValueAbbreviationExpand(obj));
1110 }
1111 }
1112 // TODO(edisonn): report end of data with inline image without an EI
1113 return inlineImage;
1114}