blob: aa8dae8bbb4c79c6b14c2ba7dbc5f1e8505ecdba [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
8#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +00009#include "SkPdfNativeObject.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +000010#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000011
edisonn@google.com33f11b62013-08-14 21:35:27 +000012// TODO(edisonn): mac builder does not find the header ... but from headers is ok
13//#include "SkPdfStreamCommonDictionary_autogen.h"
14//#include "SkPdfImageDictionary_autogen.h"
15#include "SkPdfHeaders_autogen.h"
16
edisonn@google.com78b38b12013-07-15 18:20:58 +000017
18// TODO(edisonn): perf!!!
19// there could be 0s between start and end! but not in the needle.
20static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
21 int needleLen = strlen(needle);
22 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
23 strncmp(hayStart, needle, needleLen) == 0) {
24 return hayStart;
25 }
26
27 hayStart++;
28
29 while (hayStart < hayEnd) {
30 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
31 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
32 strncmp(hayStart, needle, needleLen) == 0) {
33 return hayStart;
34 }
35 hayStart++;
36 }
37 return NULL;
38}
39
edisonn@google.come2e01ff2013-08-02 20:24:48 +000040#ifdef PDF_TRACE_TOKENIZER
edisonn@google.com3aac1f92013-07-02 22:42:53 +000041
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000042static void TRACE_COMMENT(char ch) {
43 printf("%c", ch);
44}
45
46static void TRACE_TK(char ch) {
47 printf("%c", ch);
48}
49
50static void TRACE_NAME(const unsigned char* start, const unsigned char* end) {
51 while (start < end) {
52 printf("%c", *start);
53 start++;
54 }
55 printf("\n");
56}
57
58static void TRACE_STRING(const unsigned char* start, const unsigned char* end) {
59 while (start < end) {
60 printf("%c", *start);
61 start++;
62 }
63 printf("\n");
64}
65
66static void TRACE_HEXSTRING(const unsigned char* start, const unsigned char* end) {
67 while (start < end) {
68 printf("%c", *start);
69 start++;
70 }
71 printf("\n");
72}
73
74#else
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000075#define TRACE_COMMENT(ch)
76#define TRACE_TK(ch)
77#define TRACE_NAME(start,end)
78#define TRACE_STRING(start,end)
79#define TRACE_HEXSTRING(start,end)
80#endif
81
edisonn@google.com598cf5d2013-10-09 15:13:19 +000082const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000083 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000084 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000085 if (*start == kComment_PdfDelimiter) {
86 // skip the comment until end of line
87 while (start < end && !isPdfEOL(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000088 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000089 start++;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000090 TRACE_COMMENT(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +000091 }
92 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000093 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +000094 start++;
95 }
96 }
97 return start;
98}
99
100// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000101const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000102 SkASSERT(!isPdfWhiteSpace(*start));
103
104 if (start < end && isPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000105 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000106 start++;
107 return start;
108 }
109
110 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000111 TRACE_TK(*start);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000112 start++;
113 }
114 return start;
115}
116
edisonn@google.com571c70b2013-07-10 17:09:50 +0000117// last elem has to be ]
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000118static const unsigned char* readArray(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* array, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
119 SkPdfNativeObject::makeEmptyArray(array);
120 // PUT_TRACK_STREAM(array, start, start)
edisonn@google.combca421b2013-09-05 20:00:21 +0000121
edisonn@google.com1f080162013-07-23 21:05:49 +0000122 if (allocator == NULL) {
123 // TODO(edisonn): report/warning error
124 return end;
125 }
126
edisonn@google.com571c70b2013-07-10 17:09:50 +0000127 while (start < end) {
128 // skip white spaces
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000129 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000130
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000131 const unsigned char* endOfToken = endOfPdfToken(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132
133 if (endOfToken == start) {
134 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
135 return start;
136 }
137
138 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
139 return endOfToken;
140 }
141
edisonn@google.com3aa35552013-08-14 18:26:20 +0000142 SkPdfNativeObject* newObj = allocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000143 start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000144 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
145 // we are sure they are not references!
146 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000147 SkPdfNativeObject* gen = array->removeLastInArray();
148 SkPdfNativeObject* id = array->removeLastInArray();
edisonn@google.combca421b2013-09-05 20:00:21 +0000149
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000150 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
151 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now
edisonn@google.combca421b2013-09-05 20:00:21 +0000152
edisonn@google.com571c70b2013-07-10 17:09:50 +0000153 }
154 array->appendInArray(newObj);
155 }
156 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +0000157 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
158 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +0000159 return start;
160}
161
162// When we read strings we will rewrite the string so we will reuse the memory
163// when we start to read the string, we already consumed the opened bracket
edisonn@google.com571c70b2013-07-10 17:09:50 +0000164
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000165// TODO(edisonn): space: add paramater, taht would report if we need to allocate new buffer, or we can reuse the one we have
166
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000167static const unsigned char* readString(const unsigned char* start, const unsigned char* end, unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000168 const unsigned char* in = start;
169 bool hasOut = (out != NULL);
170
171 int openRoundBrackets = 1;
172 while (in < end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000173 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
174 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000175 if (openRoundBrackets == 0) {
176 in++; // consumed )
177 break;
178 }
179
edisonn@google.com571c70b2013-07-10 17:09:50 +0000180 if (*in == kEscape_PdfSpecial) {
181 if (in + 1 < end) {
182 switch (in[1]) {
183 case 'n':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000184 if (hasOut) { *out = kLF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000185 out++;
186 in += 2;
187 break;
188
189 case 'r':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000190 if (hasOut) { *out = kCR_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000191 out++;
192 in += 2;
193 break;
194
195 case 't':
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000196 if (hasOut) { *out = kHT_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000197 out++;
198 in += 2;
199 break;
200
201 case 'b':
202 // TODO(edisonn): any special meaning to backspace?
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000203 if (hasOut) { *out = kBackspace_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000204 out++;
205 in += 2;
206 break;
207
208 case 'f':
edisonn@google.com1f080162013-07-23 21:05:49 +0000209 if (hasOut) { *out = kFF_PdfWhiteSpace; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000210 out++;
211 in += 2;
212 break;
213
214 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000215 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000216 out++;
217 in += 2;
218 break;
219
220 case kClosedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000221 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000222 out++;
223 in += 2;
224 break;
225
226 case kEscape_PdfSpecial:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000227 if (hasOut) { *out = kEscape_PdfSpecial; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000228 out++;
229 in += 2;
230 break;
231
232 case '0':
233 case '1':
234 case '2':
235 case '3':
236 case '4':
237 case '5':
238 case '6':
239 case '7': {
240 //read octals
241 in++; // consume backslash
242
243 int code = 0;
244 int i = 0;
245 while (in < end && *in >= '0' && *in < '8') {
246 code = (code << 3) + ((*in) - '0'); // code * 8 + d
247 i++;
248 in++;
249 if (i == 3) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000250 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000251 out++;
252 i = 0;
253 }
254 }
255 if (i > 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000256 if (hasOut) { *out = code & 0xff; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000257 out++;
258 }
259 }
260 break;
261
262 default:
263 // Per spec, backslash is ignored is escaped ch is unknown
264 in++;
265 break;
266 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000267 } else {
268 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000269 }
270 } else {
271 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
272 // we could have one look that first just inc current, and when we find the backslash
273 // we go to this loop
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000274 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000275 in++;
276 out++;
277 }
278 }
279
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000280 if (hasOut) {
281 return in; // consumed already ) at the end of the string
282 } else {
283 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
284 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000285}
286
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000287static int readStringLength(const unsigned char* start, const unsigned char* end) {
288 return readString(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000289}
290
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000291static const unsigned char* readString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000292 if (!allocator) {
293 return end;
294 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000295 int outLength = readStringLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000296 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
297 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000298 const unsigned char* now = readString(start, end, out);
299 SkPdfNativeObject::makeString(out, out + outLength, str);
300 // PUT_TRACK_STREAM(str, start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000301 TRACE_STRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000302 return now; // consumed already ) at the end of the string
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000303}
304
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000305static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000306 bool hasOut = (out != NULL);
307 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000308
309 unsigned char code = 0;
310
311 while (in < end) {
312 while (in < end && isPdfWhiteSpace(*in)) {
313 in++;
314 }
315
316 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000317 //*in = '\0';
318 in++; // consume >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000319 // normal exit
320 break;
321 }
322
323 if (in >= end) {
324 // end too soon
325 break;
326 }
327
328 switch (*in) {
329 case '0':
330 case '1':
331 case '2':
332 case '3':
333 case '4':
334 case '5':
335 case '6':
336 case '7':
337 case '8':
338 case '9':
339 code = (*in - '0') << 4;
340 break;
341
342 case 'a':
343 case 'b':
344 case 'c':
345 case 'd':
346 case 'e':
347 case 'f':
348 code = (*in - 'a' + 10) << 4;
349 break;
350
351 case 'A':
352 case 'B':
353 case 'C':
354 case 'D':
355 case 'E':
356 case 'F':
357 code = (*in - 'A' + 10) << 4;
358 break;
359
360 // TODO(edisonn): spec does not say how to handle this error
361 default:
362 break;
363 }
364
365 in++; // advance
366
367 while (in < end && isPdfWhiteSpace(*in)) {
368 in++;
369 }
370
371 // TODO(edisonn): report error
372 if (in >= end) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000373 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000374 out++;
375 break;
376 }
377
378 if (*in == kClosedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000379 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000380 out++;
edisonn@google.com1acab362013-07-25 22:03:22 +0000381 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000382 break;
383 }
384
385 switch (*in) {
386 case '0':
387 case '1':
388 case '2':
389 case '3':
390 case '4':
391 case '5':
392 case '6':
393 case '7':
394 case '8':
395 case '9':
396 code += (*in - '0');
397 break;
398
399 case 'a':
400 case 'b':
401 case 'c':
402 case 'd':
403 case 'e':
404 case 'f':
405 code += (*in - 'a' + 10);
406 break;
407
408 case 'A':
409 case 'B':
410 case 'C':
411 case 'D':
412 case 'E':
413 case 'F':
414 code += (*in - 'A' + 10);
415 break;
416
417 // TODO(edisonn): spec does not say how to handle this error
418 default:
419 break;
420 }
421
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000422 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000423 out++;
424 in++;
425 }
426
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000427 if (hasOut) {
428 return in; // consumed already > at the end of the string
429 } else {
430 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000431 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000432}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000433
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000434static int readHexStringLength(const unsigned char* start, const unsigned char* end) {
435 return readHexString(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000436}
437
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000438static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000439 if (!allocator) {
440 return end;
441 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000442 int outLength = readHexStringLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000443 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
444 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000445 const unsigned char* now = readHexString(start, end, out);
446 SkPdfNativeObject::makeHexString(out, out + outLength, str);
447 // str PUT_TRACK_STREAM(start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000448 TRACE_HEXSTRING(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000449 return now; // consumed already > at the end of the string
edisonn@google.com571c70b2013-07-10 17:09:50 +0000450}
451
452// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000453static const unsigned char* readName(const unsigned char* start, const unsigned char* end, unsigned char* out) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000454 bool hasOut = (out != NULL);
455 const unsigned char* in = start;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000456
457 unsigned char code = 0;
458
459 while (in < end) {
460 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
461 break;
462 }
463
464 if (*in == '#' && in + 2 < end) {
465 in++;
466 switch (*in) {
467 case '0':
468 case '1':
469 case '2':
470 case '3':
471 case '4':
472 case '5':
473 case '6':
474 case '7':
475 case '8':
476 case '9':
477 code = (*in - '0') << 4;
478 break;
479
480 case 'a':
481 case 'b':
482 case 'c':
483 case 'd':
484 case 'e':
485 case 'f':
486 code = (*in - 'a' + 10) << 4;
487 break;
488
489 case 'A':
490 case 'B':
491 case 'C':
492 case 'D':
493 case 'E':
494 case 'F':
495 code = (*in - 'A' + 10) << 4;
496 break;
497
498 // TODO(edisonn): spec does not say how to handle this error
499 default:
500 break;
501 }
502
503 in++; // advance
504
505 switch (*in) {
506 case '0':
507 case '1':
508 case '2':
509 case '3':
510 case '4':
511 case '5':
512 case '6':
513 case '7':
514 case '8':
515 case '9':
516 code += (*in - '0');
517 break;
518
519 case 'a':
520 case 'b':
521 case 'c':
522 case 'd':
523 case 'e':
524 case 'f':
525 code += (*in - 'a' + 10);
526 break;
527
528 case 'A':
529 case 'B':
530 case 'C':
531 case 'D':
532 case 'E':
533 case 'F':
534 code += (*in - 'A' + 10);
535 break;
536
537 // TODO(edisonn): spec does not say how to handle this error
538 default:
539 break;
540 }
541
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000542 if (hasOut) { *out = code; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000543 out++;
544 in++;
545 } else {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000546 if (hasOut) { *out = *in; }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000547 out++;
548 in++;
549 }
550 }
551
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000552 if (hasOut) {
553 return in;
554 } else {
555 return start + (out - (const unsigned char*)NULL); // return where the string would end if we reuse the string
556 }
557}
558
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000559static int readNameLength(const unsigned char* start, const unsigned char* end) {
560 return readName(start, end, NULL) - start;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000561}
562
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000563static const unsigned char* readName(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* name, SkPdfAllocator* allocator) {
edisonn@google.comb44334c2013-07-23 20:47:05 +0000564 if (!allocator) {
565 return end;
566 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000567 int outLength = readNameLength(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000568 // TODO(edisonn): optimize the allocation, don't allocate new string, but put it in a preallocated buffer
569 unsigned char* out = (unsigned char*)allocator->alloc(outLength);
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000570 const unsigned char* now = readName(start, end, out);
571 SkPdfNativeObject::makeName(out, out + outLength, name);
572 //PUT_TRACK_STREAM(start, now)
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000573 TRACE_NAME(out, out + outLength);
edisonn@google.combca421b2013-09-05 20:00:21 +0000574 return now;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000575}
576
577// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
578// that makes for an interesting scenario, where the stream itself contains endstream, together
579// with a reference object with the length, but the real length object would be somewhere else
580// it could confuse the parser
581/*example:
582
5837 0 obj
584<< /length 8 0 R>>
585stream
586...............
587endstream
5888 0 obj #we are in stream actually, not a real object
589<< 10 >> #we are in stream actually, not a real object
590endobj
591endstream
5928 0 obj #real obj
593<< 100 >> #real obj
594endobj
595and it could get worse, with multiple object like this
596*/
597
598// right now implement the silly algorithm that assumes endstream is finishing the stream
599
600
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000601static const unsigned char* readStream(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
602 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000603 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
604 // no stream. return.
605 return start;
606 }
607
608 start += 6; // strlen("stream")
609 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
610 start += 2;
611 } else if (start[0] == kLF_PdfWhiteSpace) {
612 start += 1;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000613 } else if (isPdfWhiteSpace(start[0])) {
614 start += 1;
615 } else {
616 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
617 // TODO(edisonn): warning?
edisonn@google.com571c70b2013-07-10 17:09:50 +0000618 }
619
620 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
621 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000622 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000623
624 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000625 if (stream->has_Length() && stream->Length(doc) > 0) {
626 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000627 }
628
629 // TODO(edisonn): laod external streams
630 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
631
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000632
633 if (length >= 0) {
634 const unsigned char* endstream = start + length;
635
636 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
637 endstream += 2;
638 } else if (endstream[0] == kLF_PdfWhiteSpace) {
639 endstream += 1;
640 }
641
642 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
643 length = -1;
644 }
645 }
646
edisonn@google.com571c70b2013-07-10 17:09:50 +0000647 if (length < 0) {
648 // scan the buffer, until we find first endstream
649 // TODO(edisonn): all buffers must have a 0 at the end now,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000650 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "endstream");
edisonn@google.com571c70b2013-07-10 17:09:50 +0000651
652 if (endstream) {
653 length = endstream - start;
654 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
edisonn@google.com78b38b12013-07-15 18:20:58 +0000655 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000656 }
657 }
658 if (length >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000659 const unsigned char* endstream = start + length;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000660
661 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
662 endstream += 2;
663 } else if (endstream[0] == kLF_PdfWhiteSpace) {
664 endstream += 1;
665 }
666
667 // TODO(edisonn): verify the next bytes are "endstream"
668
669 endstream += strlen("endstream");
670 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000671 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000672 return endstream;
673 }
674 return start;
675}
676
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000677static const unsigned char* readInlineImageStream(const unsigned char* start, const unsigned char* end, SkPdfImageDictionary* inlineImage, SkPdfNativeDoc* doc) {
edisonn@google.com78b38b12013-07-15 18:20:58 +0000678 // We already processed ID keyword, and we should be positioned immediately after it
679
680 // TODO(edisonn): security: read after end check, or make buffers with extra 2 bytes
681 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
682 start += 2;
683 } else if (start[0] == kLF_PdfWhiteSpace) {
684 start += 1;
685 } else if (isPdfWhiteSpace(start[0])) {
686 start += 1;
687 } else {
688 SkASSERT(isPdfDelimiter(start[0]));
689 // TODO(edisonn): warning?
690 }
691
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000692 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
693 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI")
edisonn@google.com78b38b12013-07-15 18:20:58 +0000694
695 if (endstream) {
696 int length = endstream - start;
697 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
698 if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
699 inlineImage->addStream(start, (size_t)length);
700 } else {
701 // TODO(edisonn): report error in inline image stream (ID-EI) section
702 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
703 return end;
704 }
705 return endEI;
706}
707
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000708static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* dict, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com1f080162013-07-23 21:05:49 +0000709 if (allocator == NULL) {
710 // TODO(edisonn): report/warning error
711 return end;
712 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000713 SkPdfNativeObject::makeEmptyDictionary(dict);
714 // PUT_TRACK_STREAM(dict, start, start)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000715
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000716 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000717 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them immediately after set.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000718
719 while (start < end && *start == kNamed_PdfDelimiter) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000720 SkPdfNativeObject key;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000721 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000722 start++;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000723 start = readName(start, end, &key, &tmpStorage);
724 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000725
726 if (start < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000727 SkPdfNativeObject* value = allocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000728 start = nextObject(start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000729
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000730 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000731
732 if (start < end) {
733 // seems we have an indirect reference
734 if (isPdfDigit(*start)) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000735 SkPdfNativeObject generation;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000736 start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000737
edisonn@google.com3aa35552013-08-14 18:26:20 +0000738 SkPdfNativeObject keywordR;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000739 start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000740
741 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
742 int64_t id = value->intValue();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000743 SkPdfNativeObject::resetAndMakeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
744 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000745 dict->set(&key, value);
746 } else {
747 // error, ignore
748 dict->set(&key, value);
749 }
750 } else {
751 // next elem is not a digit, but it might not be / either!
752 dict->set(&key, value);
753 }
754 } else {
755 // /key >>
756 dict->set(&key, value);
757 return end;
758 }
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000759 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000760 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000761 dict->set(&key, &SkPdfNativeObject::kNull);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000762 return end;
763 }
764 }
765
766 // TODO(edisonn): options to ignore these errors
767
768 // now we should expect >>
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000769 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000770 if (*start != kClosedInequityBracket_PdfDelimiter) {
771 // TODO(edisonn): report/warning
772 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000773 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000774 start++; // skip >
775 if (*start != kClosedInequityBracket_PdfDelimiter) {
776 // TODO(edisonn): report/warning
777 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000778 //*start = '\0';
edisonn@google.com78b38b12013-07-15 18:20:58 +0000779 start++; // skip >
edisonn@google.com571c70b2013-07-10 17:09:50 +0000780
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000781 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
edisonn@google.combca421b2013-09-05 20:00:21 +0000782
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000783 start = readStream(start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000784
785 return start;
786}
787
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000788const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000789 const unsigned char* current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000790
791 // skip white spaces
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000792 start = skipPdfWhiteSpaces(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000793
edisonn@google.comaf54a512013-09-13 19:33:42 +0000794 if (start >= end) {
795 return end;
796 }
797
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000798 current = endOfPdfToken(start, end);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000799
800 // no token, len would be 0
edisonn@google.comaf54a512013-09-13 19:33:42 +0000801 if (current == start || current == end) {
802 return end;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000803 }
804
805 int tokenLen = current - start;
806
807 if (tokenLen == 1) {
808 // start array
809 switch (*start) {
810 case kOpenedSquareBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000811 //*start = '\0';
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000812 return readArray(current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000813
814 case kOpenedRoundBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000815 //*start = '\0';
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000816 return readString(start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000817
818 case kOpenedInequityBracket_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000819 //*start = '\0';
edisonn@google.com571c70b2013-07-10 17:09:50 +0000820 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000821 //start[1] = '\0'; // optional
edisonn@google.com571c70b2013-07-10 17:09:50 +0000822 // TODO(edisonn): pass here the length somehow?
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000823 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000824 } else {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000825 return readHexString(start + 1, end, token, allocator); // skip <
edisonn@google.com571c70b2013-07-10 17:09:50 +0000826 }
827
828 case kNamed_PdfDelimiter:
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000829 //*start = '\0';
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000830 return readName(start + 1, end, token, allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000831
832 // TODO(edisonn): what to do curly brackets? read spec!
833 case kOpenedCurlyBracket_PdfDelimiter:
834 default:
835 break;
836 }
837
838 SkASSERT(!isPdfWhiteSpace(*start));
839 if (isPdfDelimiter(*start)) {
840 // TODO(edisonn): how stream ] } > ) will be handled?
841 // for now ignore, and it will become a keyword to be ignored
842 }
843 }
844
845 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000846 SkPdfNativeObject::makeNull(token);
847 // PUT_TRACK_STREAM(start, start + 4)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000848 return current;
849 }
850
851 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000852 SkPdfNativeObject::makeBoolean(true, token);
853 // PUT_TRACK_STREAM(start, start + 4)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000854 return current;
855 }
856
edisonn@google.comf111a4b2013-07-31 18:22:36 +0000857 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[4] == 'e') {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000858 SkPdfNativeObject::makeBoolean(false, token);
859 // PUT_TRACK_STREAM(start, start + 5)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000860 return current;
861 }
862
863 if (isPdfNumeric(*start)) {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000864 SkPdfNativeObject::makeNumeric(start, current, token);
865 // PUT_TRACK_STREAM(start, current)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000866 } else {
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000867 SkPdfNativeObject::makeKeyword(start, current, token);
868 // PUT_TRACK_STREAM(start, current)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000869 }
870 return current;
871}
872
edisonn@google.com3aa35552013-08-14 18:26:20 +0000873SkPdfNativeObject* SkPdfAllocator::allocBlock() {
874 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
875 return new SkPdfNativeObject[BUFFER_SIZE];
edisonn@google.com571c70b2013-07-10 17:09:50 +0000876}
877
878SkPdfAllocator::~SkPdfAllocator() {
879 for (int i = 0 ; i < fHandles.count(); i++) {
880 free(fHandles[i]);
881 }
882 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000883 for (int j = 0 ; j < BUFFER_SIZE; j++) {
884 fHistory[i][j].reset();
885 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000886 delete[] fHistory[i];
887 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000888 for (int j = 0 ; j < BUFFER_SIZE; j++) {
889 fCurrent[j].reset();
890 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000891 delete[] fCurrent;
892}
893
edisonn@google.com3aa35552013-08-14 18:26:20 +0000894SkPdfNativeObject* SkPdfAllocator::allocObject() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000895 if (fCurrentUsed >= BUFFER_SIZE) {
896 fHistory.push(fCurrent);
897 fCurrent = allocBlock();
898 fCurrentUsed = 0;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000899 fSizeInBytes += sizeof(SkPdfNativeObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000900 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000901 fCurrentUsed++;
902 return &fCurrent[fCurrentUsed - 1];
903}
904
905// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com33f11b62013-08-14 21:35:27 +0000906SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000907 const unsigned char* buffer = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000908 size_t len = 0;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000909 objWithStream->GetFilteredStreamRef(&buffer, &len);
edisonn@google.com222382b2013-07-10 22:33:10 +0000910 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000911 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000912 if (endobj) {
913 len = endobj - (char*)buffer + strlen("endobj");
914 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000915 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000916 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000917}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000918
edisonn@google.com33f11b62013-08-14 21:35:27 +0000919SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc) : fDoc(doc), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000920 // TODO(edisonn): hack, find end of object
edisonn@google.com78b38b12013-07-15 18:20:58 +0000921 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
edisonn@google.com222382b2013-07-10 22:33:10 +0000922 if (endobj) {
923 len = endobj - (char*)buffer + strlen("endobj");
924 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000925 fUncompressedStreamStart = fUncompressedStream = buffer;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000926 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000927}
928
929SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000930}
931
932bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000933 SkPdfNativeObject obj;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000934#ifdef PDF_TRACE_READ_TOKEN
935 static int read_op = 0;
edisonn@google.com91ce6982013-08-05 20:45:40 +0000936#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000937 token->fKeyword = NULL;
938 token->fObject = NULL;
939
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000940 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000941 if (fUncompressedStream >= fUncompressedStreamEnd) {
942 return false;
943 }
944
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000945 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
946 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)
edisonn@google.com571c70b2013-07-10 17:09:50 +0000947
948 // If it is a keyword, we will only get the pointer of the string
edisonn@google.com3aa35552013-08-14 18:26:20 +0000949 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000950 token->fKeyword = obj.c_str();
edisonn@google.come878e722013-07-29 19:10:58 +0000951 token->fKeywordLength = obj.lenstr();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000952 token->fType = kKeyword_TokenType;
953 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000954 SkPdfNativeObject* pobj = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000955 *pobj = obj;
956 token->fObject = pobj;
957 token->fType = kObject_TokenType;
958 }
959
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000960#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000961 read_op++;
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000962#if 0
edisonn@google.com222382b2013-07-10 22:33:10 +0000963 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000964 printf("break;\n");
965 }
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000966#endif
edisonn@google.come91260c2013-09-04 17:29:06 +0000967 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000968#endif
969
970 return true;
971}
972
973void SkPdfNativeTokenizer::PutBack(PdfToken token) {
974 SkASSERT(!fHasPutBack);
975 fHasPutBack = true;
976 fPutBack = token;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000977#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.come91260c2013-09-04 17:29:06 +0000978 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000979#endif
980}
981
982bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
983 if (fHasPutBack) {
984 *token = fPutBack;
985 fHasPutBack = false;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000986#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.come91260c2013-09-04 17:29:06 +0000987 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000988#endif
989 return true;
990 }
991
992 if (fEmpty) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000993#ifdef PDF_TRACE_READ_TOKEN
edisonn@google.com571c70b2013-07-10 17:09:50 +0000994 printf("EMPTY TOKENIZER\n");
995#endif
996 return false;
997 }
998
999 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001000}
edisonn@google.com78b38b12013-07-15 18:20:58 +00001001
1002#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
1003
1004// keys
1005DECLARE_PDF_NAME(BitsPerComponent);
1006DECLARE_PDF_NAME(ColorSpace);
1007DECLARE_PDF_NAME(Decode);
1008DECLARE_PDF_NAME(DecodeParms);
1009DECLARE_PDF_NAME(Filter);
1010DECLARE_PDF_NAME(Height);
1011DECLARE_PDF_NAME(ImageMask);
1012DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abreviations?
1013DECLARE_PDF_NAME(Interpolate);
1014DECLARE_PDF_NAME(Width);
1015
1016// values
1017DECLARE_PDF_NAME(DeviceGray);
1018DECLARE_PDF_NAME(DeviceRGB);
1019DECLARE_PDF_NAME(DeviceCMYK);
1020DECLARE_PDF_NAME(Indexed);
1021DECLARE_PDF_NAME(ASCIIHexDecode);
1022DECLARE_PDF_NAME(ASCII85Decode);
1023DECLARE_PDF_NAME(LZWDecode);
1024DECLARE_PDF_NAME(FlateDecode); // PDF 1.2
1025DECLARE_PDF_NAME(RunLengthDecode);
1026DECLARE_PDF_NAME(CCITTFaxDecode);
1027DECLARE_PDF_NAME(DCTDecode);
1028
1029#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
1030
1031
edisonn@google.com3aa35552013-08-14 18:26:20 +00001032static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001033 if (!key || !key->isName()) {
1034 return key;
1035 }
1036
1037 // TODO(edisonn): use autogenerated code!
1038 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
1039 HANDLE_NAME_ABBR(key, ColorSpace, CS);
1040 HANDLE_NAME_ABBR(key, Decode, D);
1041 HANDLE_NAME_ABBR(key, DecodeParms, DP);
1042 HANDLE_NAME_ABBR(key, Filter, F);
1043 HANDLE_NAME_ABBR(key, Height, H);
1044 HANDLE_NAME_ABBR(key, ImageMask, IM);
1045// HANDLE_NAME_ABBR(key, Intent, );
1046 HANDLE_NAME_ABBR(key, Interpolate, I);
1047 HANDLE_NAME_ABBR(key, Width, W);
1048
1049 return key;
1050}
1051
edisonn@google.com3aa35552013-08-14 18:26:20 +00001052static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
edisonn@google.com78b38b12013-07-15 18:20:58 +00001053 if (!value || !value->isName()) {
1054 return value;
1055 }
1056
1057 // TODO(edisonn): use autogenerated code!
1058 HANDLE_NAME_ABBR(value, DeviceGray, G);
1059 HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
1060 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
1061 HANDLE_NAME_ABBR(value, Indexed, I);
1062 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
1063 HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
1064 HANDLE_NAME_ABBR(value, LZWDecode, LZW);
1065 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2)
1066 HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
1067 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
1068 HANDLE_NAME_ABBR(value, DCTDecode, DCT);
1069
1070 return value;
1071}
1072
1073SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
1074 // BI already processed
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001075 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001076 if (fUncompressedStream >= fUncompressedStreamEnd) {
1077 return NULL;
1078 }
1079
1080 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001081 SkPdfNativeObject::makeEmptyDictionary(inlineImage);
1082 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart, fUncompressedStream - fUncompressedStreamStart)
edisonn@google.com78b38b12013-07-15 18:20:58 +00001083
1084 while (fUncompressedStream < fUncompressedStreamEnd) {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001085 SkPdfNativeObject* key = fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001086 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, fAllocator, fDoc);
1087 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
edisonn@google.com78b38b12013-07-15 18:20:58 +00001088
edisonn@google.come878e722013-07-29 19:10:58 +00001089 if (key->isKeyword() && key->lenstr() == 2 && key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001090 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, inlineImage, fDoc);
edisonn@google.com78b38b12013-07-15 18:20:58 +00001091 return inlineImage;
1092 } else {
edisonn@google.com3aa35552013-08-14 18:26:20 +00001093 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +00001094 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, fAllocator, fDoc);
1095 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
edisonn@google.com78b38b12013-07-15 18:20:58 +00001096 // TODO(edisonn): perf maybe we should not expand abreviation like this
1097 inlineImage->set(inlineImageKeyAbbreviationExpand(key),
1098 inlineImageValueAbbreviationExpand(obj));
1099 }
1100 }
1101 // TODO(edisonn): report end of data with inline image without an EI
1102 return inlineImage;
1103}