edisonn@google.com | cf2cfa1 | 2013-08-21 16:31:37 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2013 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkPdfNativeTokenizer_DEFINED |
| 9 | #define SkPdfNativeTokenizer_DEFINED |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 10 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 11 | #include <math.h> |
| 12 | #include <string.h> |
| 13 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 14 | #include "SkPdfConfig.h" |
| 15 | #include "SkTDArray.h" |
| 16 | #include "SkTDict.h" |
| 17 | |
| 18 | // All these constants are defined by the PDF 1.4 Spec. |
| 19 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 20 | class SkPdfDictionary; |
edisonn@google.com | 78b38b1 | 2013-07-15 18:20:58 +0000 | [diff] [blame] | 21 | class SkPdfImageDictionary; |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 22 | class SkPdfNativeDoc; |
| 23 | class SkPdfNativeObject; |
| 24 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 25 | |
| 26 | // White Spaces |
| 27 | #define kNUL_PdfWhiteSpace '\x00' |
| 28 | #define kHT_PdfWhiteSpace '\x09' |
| 29 | #define kLF_PdfWhiteSpace '\x0A' |
| 30 | #define kFF_PdfWhiteSpace '\x0C' |
| 31 | #define kCR_PdfWhiteSpace '\x0D' |
| 32 | #define kSP_PdfWhiteSpace '\x20' |
| 33 | |
| 34 | // PdfDelimiters |
| 35 | #define kOpenedRoundBracket_PdfDelimiter '(' |
| 36 | #define kClosedRoundBracket_PdfDelimiter ')' |
| 37 | #define kOpenedInequityBracket_PdfDelimiter '<' |
| 38 | #define kClosedInequityBracket_PdfDelimiter '>' |
| 39 | #define kOpenedSquareBracket_PdfDelimiter '[' |
| 40 | #define kClosedSquareBracket_PdfDelimiter ']' |
| 41 | #define kOpenedCurlyBracket_PdfDelimiter '{' |
| 42 | #define kClosedCurlyBracket_PdfDelimiter '}' |
| 43 | #define kNamed_PdfDelimiter '/' |
| 44 | #define kComment_PdfDelimiter '%' |
| 45 | |
| 46 | #define kEscape_PdfSpecial '\\' |
| 47 | #define kBackspace_PdfSpecial '\x08' |
| 48 | |
| 49 | // TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions? |
| 50 | // we should evaluate all options. might be even different from one machine to another |
| 51 | // 1) expand expression, let compiler optimize it |
| 52 | // 2) binary search |
| 53 | // 3) linear search in array |
| 54 | // 4) vector (e.f. T type[256] .. return type[ch] ... |
| 55 | // 5) manually build the expression with least number of operators, e.g. for consecutive |
| 56 | // chars, we can use an binary equal ignoring last bit |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 57 | #define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \ |
| 58 | ((ch)==kHT_PdfWhiteSpace)|| \ |
| 59 | ((ch)==kLF_PdfWhiteSpace)|| \ |
| 60 | ((ch)==kFF_PdfWhiteSpace)|| \ |
| 61 | ((ch)==kCR_PdfWhiteSpace)|| \ |
| 62 | ((ch)==kSP_PdfWhiteSpace)) |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 63 | |
| 64 | #define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)) |
| 65 | |
| 66 | |
| 67 | #define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\ |
| 68 | ((ch)==kClosedRoundBracket_PdfDelimiter)||\ |
| 69 | ((ch)==kOpenedInequityBracket_PdfDelimiter)||\ |
| 70 | ((ch)==kClosedInequityBracket_PdfDelimiter)||\ |
| 71 | ((ch)==kOpenedSquareBracket_PdfDelimiter)||\ |
| 72 | ((ch)==kClosedSquareBracket_PdfDelimiter)||\ |
| 73 | ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\ |
| 74 | ((ch)==kClosedCurlyBracket_PdfDelimiter)||\ |
| 75 | ((ch)==kNamed_PdfDelimiter)||\ |
| 76 | ((ch)==kComment_PdfDelimiter)) |
| 77 | |
| 78 | #define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch)) |
| 79 | |
| 80 | #define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9') |
edisonn@google.com | 4ef4bed | 2013-07-29 22:14:45 +0000 | [diff] [blame] | 81 | #define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.') |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 82 | |
edisonn@google.com | 598cf5d | 2013-10-09 15:13:19 +0000 | [diff] [blame] | 83 | const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end); |
| 84 | const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 85 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 86 | #define BUFFER_SIZE 1024 |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 87 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 88 | /** \class SkPdfAllocator |
| 89 | * |
| 90 | * An allocator only allocates memory, and it deletes it all when the allocator is destroyed. |
| 91 | * This strategy would allow us not to do any garbage collection while we parse and/or render |
| 92 | * a pdf. |
| 93 | * |
| 94 | */ |
| 95 | class SkPdfAllocator { |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 96 | public: |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 97 | SkPdfAllocator() { |
edisonn@google.com | a5aaa79 | 2013-07-11 12:27:21 +0000 | [diff] [blame] | 98 | fSizeInBytes = sizeof(*this); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 99 | fCurrent = allocBlock(); |
| 100 | fCurrentUsed = 0; |
| 101 | } |
| 102 | |
| 103 | ~SkPdfAllocator(); |
| 104 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 105 | // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called. |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 106 | SkPdfNativeObject* allocObject(); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 107 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 108 | // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 109 | void* alloc(size_t bytes) { |
| 110 | void* data = malloc(bytes); |
| 111 | fHandles.push(data); |
edisonn@google.com | a5aaa79 | 2013-07-11 12:27:21 +0000 | [diff] [blame] | 112 | fSizeInBytes += bytes; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 113 | return data; |
| 114 | } |
edisonn@google.com | a5aaa79 | 2013-07-11 12:27:21 +0000 | [diff] [blame] | 115 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 116 | // Returns the number of bytes used in this allocator. |
edisonn@google.com | 7b328fd | 2013-07-11 12:53:06 +0000 | [diff] [blame] | 117 | size_t bytesUsed() const { |
edisonn@google.com | a5aaa79 | 2013-07-11 12:27:21 +0000 | [diff] [blame] | 118 | return fSizeInBytes; |
| 119 | } |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 120 | |
| 121 | private: |
| 122 | SkTDArray<SkPdfNativeObject*> fHistory; |
| 123 | SkTDArray<void*> fHandles; |
| 124 | SkPdfNativeObject* fCurrent; |
| 125 | int fCurrentUsed; |
| 126 | |
| 127 | SkPdfNativeObject* allocBlock(); |
| 128 | size_t fSizeInBytes; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 129 | }; |
| 130 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 131 | // Type of a parsed token. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 132 | enum SkPdfTokenType { |
| 133 | kKeyword_TokenType, |
| 134 | kObject_TokenType, |
| 135 | }; |
| 136 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 137 | |
| 138 | /** \struct PdfToken |
| 139 | * |
| 140 | * Stores the result of the parsing - a keyword or an object. |
| 141 | * |
| 142 | */ |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 143 | struct PdfToken { |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 144 | const char* fKeyword; |
| 145 | size_t fKeywordLength; |
| 146 | SkPdfNativeObject* fObject; |
| 147 | SkPdfTokenType fType; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 148 | |
| 149 | PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {} |
| 150 | }; |
| 151 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 152 | /** \class SkPdfNativeTokenizer |
| 153 | * |
| 154 | * Responsible to tokenize a stream in small tokens, eityh a keyword or an object. |
| 155 | * A renderer can feed on the tokens and render a pdf. |
| 156 | * |
| 157 | */ |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 158 | class SkPdfNativeTokenizer { |
| 159 | public: |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 160 | SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, |
| 161 | SkPdfAllocator* allocator, SkPdfNativeDoc* doc); |
| 162 | SkPdfNativeTokenizer(const unsigned char* buffer, int len, |
| 163 | SkPdfAllocator* allocator, SkPdfNativeDoc* doc); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 164 | |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 165 | virtual ~SkPdfNativeTokenizer(); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 166 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 167 | // Reads one token. Returns false if there are no more tokens. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 168 | bool readToken(PdfToken* token); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 169 | |
| 170 | // Put back a token to be read in the nextToken read. Only one token is allowed to be put |
| 171 | // back. Must not necesaarely be the last token read. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 172 | void PutBack(PdfToken token); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 173 | |
| 174 | // Reads the inline image that is present in the stream. At this point we just consumed the ID |
| 175 | // token already. |
edisonn@google.com | 78b38b1 | 2013-07-15 18:20:58 +0000 | [diff] [blame] | 176 | SkPdfImageDictionary* readInlineImage(); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 177 | |
| 178 | private: |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 179 | bool readTokenCore(PdfToken* token); |
| 180 | |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 181 | SkPdfNativeDoc* fDoc; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 182 | SkPdfAllocator* fAllocator; |
| 183 | |
edisonn@google.com | 2ccc3af | 2013-07-23 17:43:18 +0000 | [diff] [blame] | 184 | const unsigned char* fUncompressedStreamStart; |
| 185 | const unsigned char* fUncompressedStream; |
| 186 | const unsigned char* fUncompressedStreamEnd; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 187 | |
| 188 | bool fEmpty; |
| 189 | bool fHasPutBack; |
| 190 | PdfToken fPutBack; |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 191 | }; |
| 192 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 193 | const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, |
| 194 | SkPdfNativeObject* token, |
| 195 | SkPdfAllocator* allocator, |
| 196 | SkPdfNativeDoc* doc); |
| 197 | |
edisonn@google.com | cf2cfa1 | 2013-08-21 16:31:37 +0000 | [diff] [blame] | 198 | #endif // SkPdfNativeTokenizer_DEFINED |