blob: 53169ca369941b7a78f329b377850f1ce0b0d6c8 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkPdfNativeTokenizer_DEFINED
9#define SkPdfNativeTokenizer_DEFINED
edisonn@google.com3aac1f92013-07-02 22:42:53 +000010
edisonn@google.combca421b2013-09-05 20:00:21 +000011#include "SkPdfConfig.h"
12
edisonn@google.com571c70b2013-07-10 17:09:50 +000013#include "SkTDArray.h"
14#include "SkTDict.h"
15#include <math.h>
16#include <string.h>
17
edisonn@google.com571c70b2013-07-10 17:09:50 +000018class SkPdfDictionary;
edisonn@google.com78b38b12013-07-15 18:20:58 +000019class SkPdfImageDictionary;
edisonn@google.com571c70b2013-07-10 17:09:50 +000020
21// White Spaces
22#define kNUL_PdfWhiteSpace '\x00'
23#define kHT_PdfWhiteSpace '\x09'
24#define kLF_PdfWhiteSpace '\x0A'
25#define kFF_PdfWhiteSpace '\x0C'
26#define kCR_PdfWhiteSpace '\x0D'
27#define kSP_PdfWhiteSpace '\x20'
28
29// PdfDelimiters
30#define kOpenedRoundBracket_PdfDelimiter '('
31#define kClosedRoundBracket_PdfDelimiter ')'
32#define kOpenedInequityBracket_PdfDelimiter '<'
33#define kClosedInequityBracket_PdfDelimiter '>'
34#define kOpenedSquareBracket_PdfDelimiter '['
35#define kClosedSquareBracket_PdfDelimiter ']'
36#define kOpenedCurlyBracket_PdfDelimiter '{'
37#define kClosedCurlyBracket_PdfDelimiter '}'
38#define kNamed_PdfDelimiter '/'
39#define kComment_PdfDelimiter '%'
40
41#define kEscape_PdfSpecial '\\'
42#define kBackspace_PdfSpecial '\x08'
43
44// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
45// we should evaluate all options. might be even different from one machine to another
46// 1) expand expression, let compiler optimize it
47// 2) binary search
48// 3) linear search in array
49// 4) vector (e.f. T type[256] .. return type[ch] ...
50// 5) manually build the expression with least number of operators, e.g. for consecutive
51// chars, we can use an binary equal ignoring last bit
52#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
53
54#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
55
56
57#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
58 ((ch)==kClosedRoundBracket_PdfDelimiter)||\
59 ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
60 ((ch)==kClosedInequityBracket_PdfDelimiter)||\
61 ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
62 ((ch)==kClosedSquareBracket_PdfDelimiter)||\
63 ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
64 ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
65 ((ch)==kNamed_PdfDelimiter)||\
66 ((ch)==kComment_PdfDelimiter))
67
68#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
69
70#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000071#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
edisonn@google.com571c70b2013-07-10 17:09:50 +000072
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000073const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end);
74const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000075
76// TODO(edisonn): typedef read and integer tyepes? make less readable...
77//typedef double SkPdfReal;
78//typedef int64_t SkPdfInteger;
79
80// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
81// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
82// while the user is looking at the image
83
edisonn@google.com3aa35552013-08-14 18:26:20 +000084class SkPdfNativeObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +000085
86class SkPdfAllocator {
87#define BUFFER_SIZE 1024
edisonn@google.com3aa35552013-08-14 18:26:20 +000088 SkTDArray<SkPdfNativeObject*> fHistory;
edisonn@google.com571c70b2013-07-10 17:09:50 +000089 SkTDArray<void*> fHandles;
edisonn@google.com3aa35552013-08-14 18:26:20 +000090 SkPdfNativeObject* fCurrent;
edisonn@google.com571c70b2013-07-10 17:09:50 +000091 int fCurrentUsed;
92
edisonn@google.com3aa35552013-08-14 18:26:20 +000093 SkPdfNativeObject* allocBlock();
edisonn@google.coma5aaa792013-07-11 12:27:21 +000094 size_t fSizeInBytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +000095
edisonn@google.com3aac1f92013-07-02 22:42:53 +000096public:
edisonn@google.com571c70b2013-07-10 17:09:50 +000097 SkPdfAllocator() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +000098 fSizeInBytes = sizeof(*this);
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 fCurrent = allocBlock();
100 fCurrentUsed = 0;
101 }
102
103 ~SkPdfAllocator();
104
edisonn@google.com3aa35552013-08-14 18:26:20 +0000105 SkPdfNativeObject* allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000106
107 // TODO(edisonn): free this memory in destructor, track the usage?
108 void* alloc(size_t bytes) {
109 void* data = malloc(bytes);
110 fHandles.push(data);
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000111 fSizeInBytes += bytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000112 return data;
113 }
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000114
edisonn@google.com7b328fd2013-07-11 12:53:06 +0000115 size_t bytesUsed() const {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000116 return fSizeInBytes;
117 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000118};
119
edisonn@google.com3aa35552013-08-14 18:26:20 +0000120class SkPdfNativeDoc;
edisonn@google.combca421b2013-09-05 20:00:21 +0000121const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc GET_TRACK_STREAM);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000122
123enum SkPdfTokenType {
124 kKeyword_TokenType,
125 kObject_TokenType,
126};
127
128struct PdfToken {
129 const char* fKeyword;
130 size_t fKeywordLength;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000131 SkPdfNativeObject* fObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132 SkPdfTokenType fType;
133
134 PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
135};
136
137class SkPdfNativeTokenizer {
138public:
edisonn@google.com33f11b62013-08-14 21:35:27 +0000139 SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
140 SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000141
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000142 virtual ~SkPdfNativeTokenizer();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000143
144 bool readToken(PdfToken* token);
145 bool readTokenCore(PdfToken* token);
146 void PutBack(PdfToken token);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000147 SkPdfImageDictionary* readInlineImage();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000148
149private:
edisonn@google.com3aa35552013-08-14 18:26:20 +0000150 SkPdfNativeDoc* fDoc;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000151 SkPdfAllocator* fAllocator;
152
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000153 const unsigned char* fUncompressedStreamStart;
154 const unsigned char* fUncompressedStream;
155 const unsigned char* fUncompressedStreamEnd;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000156
157 bool fEmpty;
158 bool fHasPutBack;
159 PdfToken fPutBack;
edisonn@google.combca421b2013-09-05 20:00:21 +0000160
161#ifdef PDF_TRACK_STREAM_OFFSETS
162 int fStreamId;
163#endif // PDF_TRACK_STREAM_OFFSETS
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000164};
165
edisonn@google.comcf2cfa12013-08-21 16:31:37 +0000166#endif // SkPdfNativeTokenizer_DEFINED