blob: 128813297aa9d77a52b817194169f09f67ebb340 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
edisonn@google.coma3356fc2013-07-10 18:20:06 +00008static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +00009 while (start < end && isPdfWhiteSpace(*start)) {
10 if (*start == kComment_PdfDelimiter) {
11 // skip the comment until end of line
12 while (start < end && !isPdfEOL(*start)) {
13 *start = '\0';
14 start++;
15 }
16 } else {
17 *start = '\0';
18 start++;
19 }
20 }
21 return start;
22}
23
24// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.coma3356fc2013-07-10 18:20:06 +000025static unsigned char* endOfPdfToken(unsigned char* start, unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000026 //int opened brackets
27 //TODO(edisonn): what out for special chars, like \n, \032
28
29 SkASSERT(!isPdfWhiteSpace(*start));
30
31 if (start < end && isPdfDelimiter(*start)) {
32 start++;
33 return start;
34 }
35
36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
37 start++;
38 }
39 return start;
40}
41
edisonn@google.com571c70b2013-07-10 17:09:50 +000042// last elem has to be ]
edisonn@google.com951d6532013-07-10 23:17:31 +000043static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000044 while (start < end) {
45 // skip white spaces
46 start = skipPdfWhiteSpaces(start, end);
47
48 unsigned char* endOfToken = endOfPdfToken(start, end);
49
50 if (endOfToken == start) {
51 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
52 return start;
53 }
54
55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
56 return endOfToken;
57 }
58
59 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com951d6532013-07-10 23:17:31 +000060 start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +000061 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
62 // we are sure they are not references!
63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
64 SkPdfObject* gen = array->removeLastInArray();
65 SkPdfObject* id = array->removeLastInArray();
66 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +000067 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +000068 }
69 array->appendInArray(newObj);
70 }
71 // TODO(edisonn): report not reached, we should never get here
72 SkASSERT(false);
73 return start;
74}
75
76// When we read strings we will rewrite the string so we will reuse the memory
77// when we start to read the string, we already consumed the opened bracket
edisonn@google.coma3356fc2013-07-10 18:20:06 +000078static unsigned char* readString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000079 unsigned char* out = start;
80 unsigned char* in = start;
81
82 int openRoundBrackets = 0;
83 while (in < end && (*in != kClosedRoundBracket_PdfDelimiter || openRoundBrackets > 0)) {
84 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
85 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
86 if (*in == kEscape_PdfSpecial) {
87 if (in + 1 < end) {
88 switch (in[1]) {
89 case 'n':
90 *out = kLF_PdfWhiteSpace;
91 out++;
92 in += 2;
93 break;
94
95 case 'r':
96 *out = kCR_PdfWhiteSpace;
97 out++;
98 in += 2;
99 break;
100
101 case 't':
102 *out = kHT_PdfWhiteSpace;
103 out++;
104 in += 2;
105 break;
106
107 case 'b':
108 // TODO(edisonn): any special meaning to backspace?
109 *out = kBackspace_PdfSpecial;
110 out++;
111 in += 2;
112 break;
113
114 case 'f':
115 *out = kFF_PdfWhiteSpace;
116 out++;
117 in += 2;
118 break;
119
120 case kOpenedRoundBracket_PdfDelimiter:
121 *out = kOpenedRoundBracket_PdfDelimiter;
122 out++;
123 in += 2;
124 break;
125
126 case kClosedRoundBracket_PdfDelimiter:
127 *out = kClosedRoundBracket_PdfDelimiter;
128 out++;
129 in += 2;
130 break;
131
132 case kEscape_PdfSpecial:
133 *out = kEscape_PdfSpecial;
134 out++;
135 in += 2;
136 break;
137
138 case '0':
139 case '1':
140 case '2':
141 case '3':
142 case '4':
143 case '5':
144 case '6':
145 case '7': {
146 //read octals
147 in++; // consume backslash
148
149 int code = 0;
150 int i = 0;
151 while (in < end && *in >= '0' && *in < '8') {
152 code = (code << 3) + ((*in) - '0'); // code * 8 + d
153 i++;
154 in++;
155 if (i == 3) {
156 *out = code & 0xff;
157 out++;
158 i = 0;
159 }
160 }
161 if (i > 0) {
162 *out = code & 0xff;
163 out++;
164 }
165 }
166 break;
167
168 default:
169 // Per spec, backslash is ignored is escaped ch is unknown
170 in++;
171 break;
172 }
173 }
174 } else {
175 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
176 // we could have one look that first just inc current, and when we find the backslash
177 // we go to this loop
178 *in = *out;
179 in++;
180 out++;
181 }
182 }
183
184
185 SkPdfObject::makeString(start, out, str);
186 return in + 1; // consume ) at the end of the string
187}
188
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000189static unsigned char* readHexString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000190 unsigned char* out = start;
191 unsigned char* in = start;
192
193 unsigned char code = 0;
194
195 while (in < end) {
196 while (in < end && isPdfWhiteSpace(*in)) {
197 in++;
198 }
199
200 if (*in == kClosedInequityBracket_PdfDelimiter) {
201 *in = '\0';
202 in++;
203 // normal exit
204 break;
205 }
206
207 if (in >= end) {
208 // end too soon
209 break;
210 }
211
212 switch (*in) {
213 case '0':
214 case '1':
215 case '2':
216 case '3':
217 case '4':
218 case '5':
219 case '6':
220 case '7':
221 case '8':
222 case '9':
223 code = (*in - '0') << 4;
224 break;
225
226 case 'a':
227 case 'b':
228 case 'c':
229 case 'd':
230 case 'e':
231 case 'f':
232 code = (*in - 'a' + 10) << 4;
233 break;
234
235 case 'A':
236 case 'B':
237 case 'C':
238 case 'D':
239 case 'E':
240 case 'F':
241 code = (*in - 'A' + 10) << 4;
242 break;
243
244 // TODO(edisonn): spec does not say how to handle this error
245 default:
246 break;
247 }
248
249 in++; // advance
250
251 while (in < end && isPdfWhiteSpace(*in)) {
252 in++;
253 }
254
255 // TODO(edisonn): report error
256 if (in >= end) {
257 *out = code;
258 out++;
259 break;
260 }
261
262 if (*in == kClosedInequityBracket_PdfDelimiter) {
263 *out = code;
264 out++;
265 break;
266 }
267
268 switch (*in) {
269 case '0':
270 case '1':
271 case '2':
272 case '3':
273 case '4':
274 case '5':
275 case '6':
276 case '7':
277 case '8':
278 case '9':
279 code += (*in - '0');
280 break;
281
282 case 'a':
283 case 'b':
284 case 'c':
285 case 'd':
286 case 'e':
287 case 'f':
288 code += (*in - 'a' + 10);
289 break;
290
291 case 'A':
292 case 'B':
293 case 'C':
294 case 'D':
295 case 'E':
296 case 'F':
297 code += (*in - 'A' + 10);
298 break;
299
300 // TODO(edisonn): spec does not say how to handle this error
301 default:
302 break;
303 }
304
305 *out = code;
306 out++;
307 in++;
308 }
309
310 if (out < in) {
311 *out = '\0';
312 }
313
314 SkPdfObject::makeHexString(start, out, str);
315 return in; // consume > at the end of the string
316}
317
318// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000319static unsigned char* readName(unsigned char* start, unsigned char* end, SkPdfObject* name) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000320 unsigned char* out = start;
321 unsigned char* in = start;
322
323 unsigned char code = 0;
324
325 while (in < end) {
326 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
327 break;
328 }
329
330 if (*in == '#' && in + 2 < end) {
331 in++;
332 switch (*in) {
333 case '0':
334 case '1':
335 case '2':
336 case '3':
337 case '4':
338 case '5':
339 case '6':
340 case '7':
341 case '8':
342 case '9':
343 code = (*in - '0') << 4;
344 break;
345
346 case 'a':
347 case 'b':
348 case 'c':
349 case 'd':
350 case 'e':
351 case 'f':
352 code = (*in - 'a' + 10) << 4;
353 break;
354
355 case 'A':
356 case 'B':
357 case 'C':
358 case 'D':
359 case 'E':
360 case 'F':
361 code = (*in - 'A' + 10) << 4;
362 break;
363
364 // TODO(edisonn): spec does not say how to handle this error
365 default:
366 break;
367 }
368
369 in++; // advance
370
371 switch (*in) {
372 case '0':
373 case '1':
374 case '2':
375 case '3':
376 case '4':
377 case '5':
378 case '6':
379 case '7':
380 case '8':
381 case '9':
382 code += (*in - '0');
383 break;
384
385 case 'a':
386 case 'b':
387 case 'c':
388 case 'd':
389 case 'e':
390 case 'f':
391 code += (*in - 'a' + 10);
392 break;
393
394 case 'A':
395 case 'B':
396 case 'C':
397 case 'D':
398 case 'E':
399 case 'F':
400 code += (*in - 'A' + 10);
401 break;
402
403 // TODO(edisonn): spec does not say how to handle this error
404 default:
405 break;
406 }
407
408 *out = code;
409 out++;
410 in++;
411 } else {
412 *out = *in;
413 out++;
414 in++;
415 }
416 }
417
418 SkPdfObject::makeName(start, out, name);
419 return in;
420}
421
422// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
423// that makes for an interesting scenario, where the stream itself contains endstream, together
424// with a reference object with the length, but the real length object would be somewhere else
425// it could confuse the parser
426/*example:
427
4287 0 obj
429<< /length 8 0 R>>
430stream
431...............
432endstream
4338 0 obj #we are in stream actually, not a real object
434<< 10 >> #we are in stream actually, not a real object
435endobj
436endstream
4378 0 obj #real obj
438<< 100 >> #real obj
439endobj
440and it could get worse, with multiple object like this
441*/
442
443// right now implement the silly algorithm that assumes endstream is finishing the stream
444
445
edisonn@google.com951d6532013-07-10 23:17:31 +0000446static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000447 start = skipPdfWhiteSpaces(start, end);
448 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
449 // no stream. return.
450 return start;
451 }
452
453 start += 6; // strlen("stream")
454 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
455 start += 2;
456 } else if (start[0] == kLF_PdfWhiteSpace) {
457 start += 1;
458 }
459
460 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
461 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000462 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000463
464 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000465 if (stream->has_Length() && stream->Length(doc) > 0) {
466 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000467 }
468
469 // TODO(edisonn): laod external streams
470 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
471
472 if (length < 0) {
473 // scan the buffer, until we find first endstream
474 // TODO(edisonn): all buffers must have a 0 at the end now,
475 // TODO(edisonn): hack (mark end of content with 0)
476 unsigned char lastCh = *end;
477 *end = '\0';
478 //SkASSERT(*end == '\0');
479 unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream");
480 *end = lastCh;
481
482 if (endstream) {
483 length = endstream - start;
484 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
485 if (*(endstream-1) == kCR_PdfWhiteSpace) length--;
486 }
487 }
488 if (length >= 0) {
489 unsigned char* endstream = start + length;
490
491 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
492 endstream += 2;
493 } else if (endstream[0] == kLF_PdfWhiteSpace) {
494 endstream += 1;
495 }
496
497 // TODO(edisonn): verify the next bytes are "endstream"
498
499 endstream += strlen("endstream");
500 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000501 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000502 return endstream;
503 }
504 return start;
505}
506
edisonn@google.com951d6532013-07-10 23:17:31 +0000507static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000508 SkPdfObject::makeEmptyDictionary(dict);
509
510 start = skipPdfWhiteSpaces(start, end);
511
512 while (start < end && *start == kNamed_PdfDelimiter) {
513 SkPdfObject key;
514 *start = '\0';
515 start++;
516 start = readName(start, end, &key);
517 start = skipPdfWhiteSpaces(start, end);
518
519 if (start < end) {
520 SkPdfObject* value = allocator->allocObject();
edisonn@google.com951d6532013-07-10 23:17:31 +0000521 start = nextObject(start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000522
523 start = skipPdfWhiteSpaces(start, end);
524
525 if (start < end) {
526 // seems we have an indirect reference
527 if (isPdfDigit(*start)) {
528 SkPdfObject generation;
edisonn@google.com951d6532013-07-10 23:17:31 +0000529 start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000530
531 SkPdfObject keywordR;
edisonn@google.com951d6532013-07-10 23:17:31 +0000532 start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000533
534 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
535 int64_t id = value->intValue();
536 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000537 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000538 dict->set(&key, value);
539 } else {
540 // error, ignore
541 dict->set(&key, value);
542 }
543 } else {
544 // next elem is not a digit, but it might not be / either!
545 dict->set(&key, value);
546 }
547 } else {
548 // /key >>
549 dict->set(&key, value);
550 return end;
551 }
552 start = skipPdfWhiteSpaces(start, end);
553 } else {
554 dict->set(&key, &SkPdfObject::kNull);
555 return end;
556 }
557 }
558
559 // TODO(edisonn): options to ignore these errors
560
561 // now we should expect >>
562 start = skipPdfWhiteSpaces(start, end);
563 start = endOfPdfToken(start, end); // >
564 start = endOfPdfToken(start, end); // >
565
566 // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...
567 // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ?
568
edisonn@google.com951d6532013-07-10 23:17:31 +0000569 start = readStream(start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000570
571 return start;
572}
573
edisonn@google.com951d6532013-07-10 23:17:31 +0000574unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000575 unsigned char* current;
576
577 // skip white spaces
578 start = skipPdfWhiteSpaces(start, end);
579
580 current = endOfPdfToken(start, end);
581
582 // no token, len would be 0
583 if (current == start) {
584 return NULL;
585 }
586
587 int tokenLen = current - start;
588
589 if (tokenLen == 1) {
590 // start array
591 switch (*start) {
592 case kOpenedSquareBracket_PdfDelimiter:
593 *start = '\0';
594 SkPdfObject::makeEmptyArray(token);
edisonn@google.com951d6532013-07-10 23:17:31 +0000595 return readArray(current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000596
597 case kOpenedRoundBracket_PdfDelimiter:
598 *start = '\0';
599 return readString(start, end, token);
600
601 case kOpenedInequityBracket_PdfDelimiter:
602 *start = '\0';
603 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
604 // TODO(edisonn): pass here the length somehow?
edisonn@google.com951d6532013-07-10 23:17:31 +0000605 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000606 } else {
607 return readHexString(start + 1, end, token); // skip <
608 }
609
610 case kNamed_PdfDelimiter:
611 *start = '\0';
612 return readName(start + 1, end, token);
613
614 // TODO(edisonn): what to do curly brackets? read spec!
615 case kOpenedCurlyBracket_PdfDelimiter:
616 default:
617 break;
618 }
619
620 SkASSERT(!isPdfWhiteSpace(*start));
621 if (isPdfDelimiter(*start)) {
622 // TODO(edisonn): how stream ] } > ) will be handled?
623 // for now ignore, and it will become a keyword to be ignored
624 }
625 }
626
627 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
628 SkPdfObject::makeNull(token);
629 return current;
630 }
631
632 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
633 SkPdfObject::makeBoolean(true, token);
634 return current;
635 }
636
637 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
638 SkPdfObject::makeBoolean(false, token);
639 return current;
640 }
641
642 if (isPdfNumeric(*start)) {
643 SkPdfObject::makeNumeric(start, current, token);
644 } else {
645 SkPdfObject::makeKeyword(start, current, token);
646 }
647 return current;
648}
649
650SkPdfObject* SkPdfAllocator::allocBlock() {
651 return new SkPdfObject[BUFFER_SIZE];
652}
653
654SkPdfAllocator::~SkPdfAllocator() {
655 for (int i = 0 ; i < fHandles.count(); i++) {
656 free(fHandles[i]);
657 }
658 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000659 for (int j = 0 ; j < BUFFER_SIZE; j++) {
660 fHistory[i][j].reset();
661 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000662 delete[] fHistory[i];
663 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000664 for (int j = 0 ; j < BUFFER_SIZE; j++) {
665 fCurrent[j].reset();
666 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000667 delete[] fCurrent;
668}
669
670SkPdfObject* SkPdfAllocator::allocObject() {
671 if (fCurrentUsed >= BUFFER_SIZE) {
672 fHistory.push(fCurrent);
673 fCurrent = allocBlock();
674 fCurrentUsed = 0;
675 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000676 fCurrentUsed++;
677 return &fCurrent[fCurrentUsed - 1];
678}
679
680// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000681SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000682 unsigned char* buffer = NULL;
683 size_t len = 0;
684 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
edisonn@google.com222382b2013-07-10 22:33:10 +0000685 // TODO(edisonn): hack, find end of object
686 char* endobj = strstr((char*)buffer, "endobj");
687 if (endobj) {
688 len = endobj - (char*)buffer + strlen("endobj");
689 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000690 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
691 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000692 memcpy(fUncompressedStream, buffer, len);
693}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000694
edisonn@google.com951d6532013-07-10 23:17:31 +0000695SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000696 // TODO(edisonn): hack, find end of object
697 char* endobj = strstr((char*)buffer, "endobj");
698 if (endobj) {
699 len = endobj - (char*)buffer + strlen("endobj");
700 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000701 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
702 fUncompressedStreamEnd = fUncompressedStream + len;
703 memcpy(fUncompressedStream, buffer, len);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000704}
705
706SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000707}
708
709bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
710 token->fKeyword = NULL;
711 token->fObject = NULL;
712
713 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
714 if (fUncompressedStream >= fUncompressedStreamEnd) {
715 return false;
716 }
717
718 SkPdfObject obj;
edisonn@google.com951d6532013-07-10 23:17:31 +0000719 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000720
721 // If it is a keyword, we will only get the pointer of the string
722 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
723 token->fKeyword = obj.c_str();
724 token->fKeywordLength = obj.len();
725 token->fType = kKeyword_TokenType;
726 } else {
727 SkPdfObject* pobj = fAllocator->allocObject();
728 *pobj = obj;
729 token->fObject = pobj;
730 token->fType = kObject_TokenType;
731 }
732
733#ifdef PDF_TRACE
734 static int read_op = 0;
735 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000736 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000737 printf("break;\n");
738 }
739 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
740#endif
741
742 return true;
743}
744
745void SkPdfNativeTokenizer::PutBack(PdfToken token) {
746 SkASSERT(!fHasPutBack);
747 fHasPutBack = true;
748 fPutBack = token;
749#ifdef PDF_TRACE
750 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
751#endif
752}
753
754bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
755 if (fHasPutBack) {
756 *token = fPutBack;
757 fHasPutBack = false;
758#ifdef PDF_TRACE
759 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
760#endif
761 return true;
762 }
763
764 if (fEmpty) {
765#ifdef PDF_TRACE
766 printf("EMPTY TOKENIZER\n");
767#endif
768 return false;
769 }
770
771 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000772}
edisonn@google.com222382b2013-07-10 22:33:10 +0000773