blob: de49e35f11039428987ecb573986329f61c243e3 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001
2#include "SkPdfNativeTokenizer.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00003#include "SkPdfObject.h"
4#include "SkPdfConfig.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00005
edisonn@google.com571c70b2013-07-10 17:09:50 +00006#include "SkPdfStreamCommonDictionary_autogen.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +00007
edisonn@google.coma3356fc2013-07-10 18:20:06 +00008static unsigned char* skipPdfWhiteSpaces(unsigned char* start, unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +00009 while (start < end && isPdfWhiteSpace(*start)) {
10 if (*start == kComment_PdfDelimiter) {
11 // skip the comment until end of line
12 while (start < end && !isPdfEOL(*start)) {
13 *start = '\0';
14 start++;
15 }
16 } else {
17 *start = '\0';
18 start++;
19 }
20 }
21 return start;
22}
23
24// TODO(edisonn) '(' can be used, will it break the string a delimiter or space inside () ?
edisonn@google.coma3356fc2013-07-10 18:20:06 +000025static unsigned char* endOfPdfToken(unsigned char* start, unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000026 //int opened brackets
27 //TODO(edisonn): what out for special chars, like \n, \032
28
29 SkASSERT(!isPdfWhiteSpace(*start));
30
31 if (start < end && isPdfDelimiter(*start)) {
32 start++;
33 return start;
34 }
35
36 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
37 start++;
38 }
39 return start;
40}
41
edisonn@google.com571c70b2013-07-10 17:09:50 +000042// last elem has to be ]
edisonn@google.com951d6532013-07-10 23:17:31 +000043static unsigned char* readArray(unsigned char* start, unsigned char* end, SkPdfObject* array, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000044 while (start < end) {
45 // skip white spaces
46 start = skipPdfWhiteSpaces(start, end);
47
48 unsigned char* endOfToken = endOfPdfToken(start, end);
49
50 if (endOfToken == start) {
51 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
52 return start;
53 }
54
55 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
56 return endOfToken;
57 }
58
59 SkPdfObject* newObj = allocator->allocObject();
edisonn@google.com951d6532013-07-10 23:17:31 +000060 start = nextObject(start, end, newObj, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +000061 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array only when
62 // we are sure they are not references!
63 if (newObj->isKeywordReference() && array->size() >= 2 && array->objAtAIndex(array->size() - 1)->isInteger() && array->objAtAIndex(array->size() - 2)->isInteger()) {
64 SkPdfObject* gen = array->removeLastInArray();
65 SkPdfObject* id = array->removeLastInArray();
66 newObj->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +000067 SkPdfObject::makeReference((unsigned int)id->intValue(), (unsigned int)gen->intValue(), newObj);
edisonn@google.com571c70b2013-07-10 17:09:50 +000068 }
69 array->appendInArray(newObj);
70 }
71 // TODO(edisonn): report not reached, we should never get here
edisonn@google.com8bad7372013-07-10 23:36:56 +000072 // TODO(edisonn): there might be a bug here, enable an assert and run it on files
73 // or it might be that the files were actually corrupted
edisonn@google.com571c70b2013-07-10 17:09:50 +000074 return start;
75}
76
77// When we read strings we will rewrite the string so we will reuse the memory
78// when we start to read the string, we already consumed the opened bracket
edisonn@google.coma3356fc2013-07-10 18:20:06 +000079static unsigned char* readString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000080 unsigned char* out = start;
81 unsigned char* in = start;
82
83 int openRoundBrackets = 0;
84 while (in < end && (*in != kClosedRoundBracket_PdfDelimiter || openRoundBrackets > 0)) {
85 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
86 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
87 if (*in == kEscape_PdfSpecial) {
88 if (in + 1 < end) {
89 switch (in[1]) {
90 case 'n':
91 *out = kLF_PdfWhiteSpace;
92 out++;
93 in += 2;
94 break;
95
96 case 'r':
97 *out = kCR_PdfWhiteSpace;
98 out++;
99 in += 2;
100 break;
101
102 case 't':
103 *out = kHT_PdfWhiteSpace;
104 out++;
105 in += 2;
106 break;
107
108 case 'b':
109 // TODO(edisonn): any special meaning to backspace?
110 *out = kBackspace_PdfSpecial;
111 out++;
112 in += 2;
113 break;
114
115 case 'f':
116 *out = kFF_PdfWhiteSpace;
117 out++;
118 in += 2;
119 break;
120
121 case kOpenedRoundBracket_PdfDelimiter:
122 *out = kOpenedRoundBracket_PdfDelimiter;
123 out++;
124 in += 2;
125 break;
126
127 case kClosedRoundBracket_PdfDelimiter:
128 *out = kClosedRoundBracket_PdfDelimiter;
129 out++;
130 in += 2;
131 break;
132
133 case kEscape_PdfSpecial:
134 *out = kEscape_PdfSpecial;
135 out++;
136 in += 2;
137 break;
138
139 case '0':
140 case '1':
141 case '2':
142 case '3':
143 case '4':
144 case '5':
145 case '6':
146 case '7': {
147 //read octals
148 in++; // consume backslash
149
150 int code = 0;
151 int i = 0;
152 while (in < end && *in >= '0' && *in < '8') {
153 code = (code << 3) + ((*in) - '0'); // code * 8 + d
154 i++;
155 in++;
156 if (i == 3) {
157 *out = code & 0xff;
158 out++;
159 i = 0;
160 }
161 }
162 if (i > 0) {
163 *out = code & 0xff;
164 out++;
165 }
166 }
167 break;
168
169 default:
170 // Per spec, backslash is ignored is escaped ch is unknown
171 in++;
172 break;
173 }
edisonn@google.com8bad7372013-07-10 23:36:56 +0000174 } else {
175 in++;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000176 }
177 } else {
178 // TODO(edisonn): perf, avoid copy into itself, maybe first do a simple scan until found backslash ?
179 // we could have one look that first just inc current, and when we find the backslash
180 // we go to this loop
181 *in = *out;
182 in++;
183 out++;
184 }
185 }
186
187
188 SkPdfObject::makeString(start, out, str);
189 return in + 1; // consume ) at the end of the string
190}
191
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000192static unsigned char* readHexString(unsigned char* start, unsigned char* end, SkPdfObject* str) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000193 unsigned char* out = start;
194 unsigned char* in = start;
195
196 unsigned char code = 0;
197
198 while (in < end) {
199 while (in < end && isPdfWhiteSpace(*in)) {
200 in++;
201 }
202
203 if (*in == kClosedInequityBracket_PdfDelimiter) {
204 *in = '\0';
205 in++;
206 // normal exit
207 break;
208 }
209
210 if (in >= end) {
211 // end too soon
212 break;
213 }
214
215 switch (*in) {
216 case '0':
217 case '1':
218 case '2':
219 case '3':
220 case '4':
221 case '5':
222 case '6':
223 case '7':
224 case '8':
225 case '9':
226 code = (*in - '0') << 4;
227 break;
228
229 case 'a':
230 case 'b':
231 case 'c':
232 case 'd':
233 case 'e':
234 case 'f':
235 code = (*in - 'a' + 10) << 4;
236 break;
237
238 case 'A':
239 case 'B':
240 case 'C':
241 case 'D':
242 case 'E':
243 case 'F':
244 code = (*in - 'A' + 10) << 4;
245 break;
246
247 // TODO(edisonn): spec does not say how to handle this error
248 default:
249 break;
250 }
251
252 in++; // advance
253
254 while (in < end && isPdfWhiteSpace(*in)) {
255 in++;
256 }
257
258 // TODO(edisonn): report error
259 if (in >= end) {
260 *out = code;
261 out++;
262 break;
263 }
264
265 if (*in == kClosedInequityBracket_PdfDelimiter) {
266 *out = code;
267 out++;
268 break;
269 }
270
271 switch (*in) {
272 case '0':
273 case '1':
274 case '2':
275 case '3':
276 case '4':
277 case '5':
278 case '6':
279 case '7':
280 case '8':
281 case '9':
282 code += (*in - '0');
283 break;
284
285 case 'a':
286 case 'b':
287 case 'c':
288 case 'd':
289 case 'e':
290 case 'f':
291 code += (*in - 'a' + 10);
292 break;
293
294 case 'A':
295 case 'B':
296 case 'C':
297 case 'D':
298 case 'E':
299 case 'F':
300 code += (*in - 'A' + 10);
301 break;
302
303 // TODO(edisonn): spec does not say how to handle this error
304 default:
305 break;
306 }
307
308 *out = code;
309 out++;
310 in++;
311 }
312
313 if (out < in) {
314 *out = '\0';
315 }
316
317 SkPdfObject::makeHexString(start, out, str);
318 return in; // consume > at the end of the string
319}
320
321// TODO(edisonn): before PDF 1.2 name could not have special characters, add version parameter
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000322static unsigned char* readName(unsigned char* start, unsigned char* end, SkPdfObject* name) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000323 unsigned char* out = start;
324 unsigned char* in = start;
325
326 unsigned char code = 0;
327
328 while (in < end) {
329 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
330 break;
331 }
332
333 if (*in == '#' && in + 2 < end) {
334 in++;
335 switch (*in) {
336 case '0':
337 case '1':
338 case '2':
339 case '3':
340 case '4':
341 case '5':
342 case '6':
343 case '7':
344 case '8':
345 case '9':
346 code = (*in - '0') << 4;
347 break;
348
349 case 'a':
350 case 'b':
351 case 'c':
352 case 'd':
353 case 'e':
354 case 'f':
355 code = (*in - 'a' + 10) << 4;
356 break;
357
358 case 'A':
359 case 'B':
360 case 'C':
361 case 'D':
362 case 'E':
363 case 'F':
364 code = (*in - 'A' + 10) << 4;
365 break;
366
367 // TODO(edisonn): spec does not say how to handle this error
368 default:
369 break;
370 }
371
372 in++; // advance
373
374 switch (*in) {
375 case '0':
376 case '1':
377 case '2':
378 case '3':
379 case '4':
380 case '5':
381 case '6':
382 case '7':
383 case '8':
384 case '9':
385 code += (*in - '0');
386 break;
387
388 case 'a':
389 case 'b':
390 case 'c':
391 case 'd':
392 case 'e':
393 case 'f':
394 code += (*in - 'a' + 10);
395 break;
396
397 case 'A':
398 case 'B':
399 case 'C':
400 case 'D':
401 case 'E':
402 case 'F':
403 code += (*in - 'A' + 10);
404 break;
405
406 // TODO(edisonn): spec does not say how to handle this error
407 default:
408 break;
409 }
410
411 *out = code;
412 out++;
413 in++;
414 } else {
415 *out = *in;
416 out++;
417 in++;
418 }
419 }
420
421 SkPdfObject::makeName(start, out, name);
422 return in;
423}
424
425// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
426// that makes for an interesting scenario, where the stream itself contains endstream, together
427// with a reference object with the length, but the real length object would be somewhere else
428// it could confuse the parser
429/*example:
430
4317 0 obj
432<< /length 8 0 R>>
433stream
434...............
435endstream
4368 0 obj #we are in stream actually, not a real object
437<< 10 >> #we are in stream actually, not a real object
438endobj
439endstream
4408 0 obj #real obj
441<< 100 >> #real obj
442endobj
443and it could get worse, with multiple object like this
444*/
445
446// right now implement the silly algorithm that assumes endstream is finishing the stream
447
448
edisonn@google.com951d6532013-07-10 23:17:31 +0000449static unsigned char* readStream(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000450 start = skipPdfWhiteSpaces(start, end);
451 if (!(start[0] == 's' && start[1] == 't' && start[2] == 'r' && start[3] == 'e' && start[4] == 'a' && start[5] == 'm')) {
452 // no stream. return.
453 return start;
454 }
455
456 start += 6; // strlen("stream")
457 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
458 start += 2;
459 } else if (start[0] == kLF_PdfWhiteSpace) {
460 start += 1;
461 }
462
463 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
464 // TODO(edisonn): load Length
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000465 int64_t length = -1;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000466
467 // TODO(edisonn): very basic implementation
edisonn@google.com951d6532013-07-10 23:17:31 +0000468 if (stream->has_Length() && stream->Length(doc) > 0) {
469 length = stream->Length(doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000470 }
471
472 // TODO(edisonn): laod external streams
473 // TODO(edisonn): look at the last filter, to determione how to deal with possible issue
474
475 if (length < 0) {
476 // scan the buffer, until we find first endstream
477 // TODO(edisonn): all buffers must have a 0 at the end now,
478 // TODO(edisonn): hack (mark end of content with 0)
479 unsigned char lastCh = *end;
480 *end = '\0';
481 //SkASSERT(*end == '\0');
482 unsigned char* endstream = (unsigned char*)strstr((const char*)start, "endstream");
483 *end = lastCh;
484
485 if (endstream) {
486 length = endstream - start;
487 if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
488 if (*(endstream-1) == kCR_PdfWhiteSpace) length--;
489 }
490 }
491 if (length >= 0) {
492 unsigned char* endstream = start + length;
493
494 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
495 endstream += 2;
496 } else if (endstream[0] == kLF_PdfWhiteSpace) {
497 endstream += 1;
498 }
499
500 // TODO(edisonn): verify the next bytes are "endstream"
501
502 endstream += strlen("endstream");
503 // TODO(edisonn): Assert? report error/warning?
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000504 dict->addStream(start, (size_t)length);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000505 return endstream;
506 }
507 return start;
508}
509
edisonn@google.com951d6532013-07-10 23:17:31 +0000510static unsigned char* readDictionary(unsigned char* start, unsigned char* end, SkPdfObject* dict, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000511 SkPdfObject::makeEmptyDictionary(dict);
512
513 start = skipPdfWhiteSpaces(start, end);
514
515 while (start < end && *start == kNamed_PdfDelimiter) {
516 SkPdfObject key;
517 *start = '\0';
518 start++;
519 start = readName(start, end, &key);
520 start = skipPdfWhiteSpaces(start, end);
521
522 if (start < end) {
523 SkPdfObject* value = allocator->allocObject();
edisonn@google.com951d6532013-07-10 23:17:31 +0000524 start = nextObject(start, end, value, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000525
526 start = skipPdfWhiteSpaces(start, end);
527
528 if (start < end) {
529 // seems we have an indirect reference
530 if (isPdfDigit(*start)) {
531 SkPdfObject generation;
edisonn@google.com951d6532013-07-10 23:17:31 +0000532 start = nextObject(start, end, &generation, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000533
534 SkPdfObject keywordR;
edisonn@google.com951d6532013-07-10 23:17:31 +0000535 start = nextObject(start, end, &keywordR, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000536
537 if (value->isInteger() && generation.isInteger() && keywordR.isKeywordReference()) {
538 int64_t id = value->intValue();
539 value->reset();
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000540 SkPdfObject::makeReference((unsigned int)id, (unsigned int)generation.intValue(), value);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000541 dict->set(&key, value);
542 } else {
543 // error, ignore
544 dict->set(&key, value);
545 }
546 } else {
547 // next elem is not a digit, but it might not be / either!
548 dict->set(&key, value);
549 }
550 } else {
551 // /key >>
552 dict->set(&key, value);
553 return end;
554 }
555 start = skipPdfWhiteSpaces(start, end);
556 } else {
557 dict->set(&key, &SkPdfObject::kNull);
558 return end;
559 }
560 }
561
562 // TODO(edisonn): options to ignore these errors
563
564 // now we should expect >>
565 start = skipPdfWhiteSpaces(start, end);
566 start = endOfPdfToken(start, end); // >
567 start = endOfPdfToken(start, end); // >
568
569 // TODO(edisonn): read stream ... put dict and stream in a struct, and have a pointer to struct ...
570 // or alocate 2 objects, and if there is no stream, free it to be used by someone else? or just leave it ?
571
edisonn@google.com951d6532013-07-10 23:17:31 +0000572 start = readStream(start, end, dict, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000573
574 return start;
575}
576
edisonn@google.com951d6532013-07-10 23:17:31 +0000577unsigned char* nextObject(unsigned char* start, unsigned char* end, SkPdfObject* token, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000578 unsigned char* current;
579
580 // skip white spaces
581 start = skipPdfWhiteSpaces(start, end);
582
583 current = endOfPdfToken(start, end);
584
585 // no token, len would be 0
586 if (current == start) {
587 return NULL;
588 }
589
590 int tokenLen = current - start;
591
592 if (tokenLen == 1) {
593 // start array
594 switch (*start) {
595 case kOpenedSquareBracket_PdfDelimiter:
596 *start = '\0';
597 SkPdfObject::makeEmptyArray(token);
edisonn@google.com951d6532013-07-10 23:17:31 +0000598 return readArray(current, end, token, allocator, doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000599
600 case kOpenedRoundBracket_PdfDelimiter:
601 *start = '\0';
602 return readString(start, end, token);
603
604 case kOpenedInequityBracket_PdfDelimiter:
605 *start = '\0';
606 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
607 // TODO(edisonn): pass here the length somehow?
edisonn@google.com951d6532013-07-10 23:17:31 +0000608 return readDictionary(start + 2, end, token, allocator, doc); // skip <<
edisonn@google.com571c70b2013-07-10 17:09:50 +0000609 } else {
610 return readHexString(start + 1, end, token); // skip <
611 }
612
613 case kNamed_PdfDelimiter:
614 *start = '\0';
615 return readName(start + 1, end, token);
616
617 // TODO(edisonn): what to do curly brackets? read spec!
618 case kOpenedCurlyBracket_PdfDelimiter:
619 default:
620 break;
621 }
622
623 SkASSERT(!isPdfWhiteSpace(*start));
624 if (isPdfDelimiter(*start)) {
625 // TODO(edisonn): how stream ] } > ) will be handled?
626 // for now ignore, and it will become a keyword to be ignored
627 }
628 }
629
630 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
631 SkPdfObject::makeNull(token);
632 return current;
633 }
634
635 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
636 SkPdfObject::makeBoolean(true, token);
637 return current;
638 }
639
640 if (tokenLen == 5 && start[0] == 'f' && start[1] == 'a' && start[2] == 'l' && start[3] == 's' && start[3] == 'e') {
641 SkPdfObject::makeBoolean(false, token);
642 return current;
643 }
644
645 if (isPdfNumeric(*start)) {
646 SkPdfObject::makeNumeric(start, current, token);
647 } else {
648 SkPdfObject::makeKeyword(start, current, token);
649 }
650 return current;
651}
652
653SkPdfObject* SkPdfAllocator::allocBlock() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000654 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfObject);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000655 return new SkPdfObject[BUFFER_SIZE];
656}
657
658SkPdfAllocator::~SkPdfAllocator() {
659 for (int i = 0 ; i < fHandles.count(); i++) {
660 free(fHandles[i]);
661 }
662 for (int i = 0 ; i < fHistory.count(); i++) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000663 for (int j = 0 ; j < BUFFER_SIZE; j++) {
664 fHistory[i][j].reset();
665 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000666 delete[] fHistory[i];
667 }
edisonn@google.com222382b2013-07-10 22:33:10 +0000668 for (int j = 0 ; j < BUFFER_SIZE; j++) {
669 fCurrent[j].reset();
670 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000671 delete[] fCurrent;
672}
673
674SkPdfObject* SkPdfAllocator::allocObject() {
675 if (fCurrentUsed >= BUFFER_SIZE) {
676 fHistory.push(fCurrent);
677 fCurrent = allocBlock();
678 fCurrentUsed = 0;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000679 fSizeInBytes += sizeof(SkPdfObject*);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000680 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000681 fCurrentUsed++;
682 return &fCurrent[fCurrentUsed - 1];
683}
684
685// TODO(edisonn): perf: do no copy the buffers, but use them, and mark cache the result, so there is no need of a second pass
edisonn@google.com951d6532013-07-10 23:17:31 +0000686SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fUncompressedStream(NULL), fUncompressedStreamEnd(NULL), fEmpty(false), fHasPutBack(false) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000687 unsigned char* buffer = NULL;
688 size_t len = 0;
689 objWithStream->GetFilteredStreamRef(&buffer, &len, fAllocator);
edisonn@google.com222382b2013-07-10 22:33:10 +0000690 // TODO(edisonn): hack, find end of object
691 char* endobj = strstr((char*)buffer, "endobj");
692 if (endobj) {
693 len = endobj - (char*)buffer + strlen("endobj");
694 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000695 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
696 fUncompressedStreamEnd = fUncompressedStream + len;
edisonn@google.com222382b2013-07-10 22:33:10 +0000697 memcpy(fUncompressedStream, buffer, len);
698}
edisonn@google.com571c70b2013-07-10 17:09:50 +0000699
edisonn@google.com951d6532013-07-10 23:17:31 +0000700SkPdfNativeTokenizer::SkPdfNativeTokenizer(unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkNativeParsedPDF* doc) : fDoc(doc), fMapper(mapper), fAllocator(allocator), fEmpty(false), fHasPutBack(false) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000701 // TODO(edisonn): hack, find end of object
702 char* endobj = strstr((char*)buffer, "endobj");
703 if (endobj) {
704 len = endobj - (char*)buffer + strlen("endobj");
705 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000706 fUncompressedStreamStart = fUncompressedStream = (unsigned char*)fAllocator->alloc(len);
707 fUncompressedStreamEnd = fUncompressedStream + len;
708 memcpy(fUncompressedStream, buffer, len);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000709}
710
711SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000712}
713
714bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
715 token->fKeyword = NULL;
716 token->fObject = NULL;
717
718 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
719 if (fUncompressedStream >= fUncompressedStreamEnd) {
720 return false;
721 }
722
723 SkPdfObject obj;
edisonn@google.com951d6532013-07-10 23:17:31 +0000724 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000725
726 // If it is a keyword, we will only get the pointer of the string
727 if (obj.type() == SkPdfObject::kKeyword_PdfObjectType) {
728 token->fKeyword = obj.c_str();
729 token->fKeywordLength = obj.len();
730 token->fType = kKeyword_TokenType;
731 } else {
732 SkPdfObject* pobj = fAllocator->allocObject();
733 *pobj = obj;
734 token->fObject = pobj;
735 token->fType = kObject_TokenType;
736 }
737
738#ifdef PDF_TRACE
739 static int read_op = 0;
740 read_op++;
edisonn@google.com222382b2013-07-10 22:33:10 +0000741 if (548 == read_op) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000742 printf("break;\n");
743 }
744 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
745#endif
746
747 return true;
748}
749
750void SkPdfNativeTokenizer::PutBack(PdfToken token) {
751 SkASSERT(!fHasPutBack);
752 fHasPutBack = true;
753 fPutBack = token;
754#ifdef PDF_TRACE
755 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", token.fKeyword ? std::string(token.fKeyword, token.fKeywordLength).c_str(): token.fObject->toString().c_str());
756#endif
757}
758
759bool SkPdfNativeTokenizer::readToken(PdfToken* token) {
760 if (fHasPutBack) {
761 *token = fPutBack;
762 fHasPutBack = false;
763#ifdef PDF_TRACE
764 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", token->fKeyword ? std::string(token->fKeyword, token->fKeywordLength).c_str() : token->fObject->toString().c_str());
765#endif
766 return true;
767 }
768
769 if (fEmpty) {
770#ifdef PDF_TRACE
771 printf("EMPTY TOKENIZER\n");
772#endif
773 return false;
774 }
775
776 return readTokenCore(token);
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000777}