blob: 8ed354cfa138a8fe08f32f461ed0f16fb1733695 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkPdfNativeTokenizer_DEFINED
9#define SkPdfNativeTokenizer_DEFINED
edisonn@google.com3aac1f92013-07-02 22:42:53 +000010
edisonn@google.com571c70b2013-07-10 17:09:50 +000011#include <math.h>
12#include <string.h>
13
edisonn@google.com2af2ad92013-10-11 16:17:44 +000014#include "SkPdfConfig.h"
15#include "SkTDArray.h"
16#include "SkTDict.h"
17
18// All these constants are defined by the PDF 1.4 Spec.
19
edisonn@google.com571c70b2013-07-10 17:09:50 +000020class SkPdfDictionary;
edisonn@google.com78b38b12013-07-15 18:20:58 +000021class SkPdfImageDictionary;
edisonn@google.com2af2ad92013-10-11 16:17:44 +000022class SkPdfNativeDoc;
23class SkPdfNativeObject;
24
edisonn@google.com571c70b2013-07-10 17:09:50 +000025
26// White Spaces
27#define kNUL_PdfWhiteSpace '\x00'
28#define kHT_PdfWhiteSpace '\x09'
29#define kLF_PdfWhiteSpace '\x0A'
30#define kFF_PdfWhiteSpace '\x0C'
31#define kCR_PdfWhiteSpace '\x0D'
32#define kSP_PdfWhiteSpace '\x20'
33
34// PdfDelimiters
35#define kOpenedRoundBracket_PdfDelimiter '('
36#define kClosedRoundBracket_PdfDelimiter ')'
37#define kOpenedInequityBracket_PdfDelimiter '<'
38#define kClosedInequityBracket_PdfDelimiter '>'
39#define kOpenedSquareBracket_PdfDelimiter '['
40#define kClosedSquareBracket_PdfDelimiter ']'
41#define kOpenedCurlyBracket_PdfDelimiter '{'
42#define kClosedCurlyBracket_PdfDelimiter '}'
43#define kNamed_PdfDelimiter '/'
44#define kComment_PdfDelimiter '%'
45
46#define kEscape_PdfSpecial '\\'
47#define kBackspace_PdfSpecial '\x08'
48
49// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
50// we should evaluate all options. might be even different from one machine to another
51// 1) expand expression, let compiler optimize it
52// 2) binary search
53// 3) linear search in array
54// 4) vector (e.f. T type[256] .. return type[ch] ...
55// 5) manually build the expression with least number of operators, e.g. for consecutive
56// chars, we can use an binary equal ignoring last bit
edisonn@google.com2af2ad92013-10-11 16:17:44 +000057#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \
58 ((ch)==kHT_PdfWhiteSpace)|| \
59 ((ch)==kLF_PdfWhiteSpace)|| \
60 ((ch)==kFF_PdfWhiteSpace)|| \
61 ((ch)==kCR_PdfWhiteSpace)|| \
62 ((ch)==kSP_PdfWhiteSpace))
edisonn@google.com571c70b2013-07-10 17:09:50 +000063
64#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
65
66
67#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
68 ((ch)==kClosedRoundBracket_PdfDelimiter)||\
69 ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
70 ((ch)==kClosedInequityBracket_PdfDelimiter)||\
71 ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
72 ((ch)==kClosedSquareBracket_PdfDelimiter)||\
73 ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
74 ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
75 ((ch)==kNamed_PdfDelimiter)||\
76 ((ch)==kComment_PdfDelimiter))
77
78#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
79
80#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000081#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
edisonn@google.com571c70b2013-07-10 17:09:50 +000082
edisonn@google.com598cf5d2013-10-09 15:13:19 +000083const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end);
84const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000085
edisonn@google.com571c70b2013-07-10 17:09:50 +000086#define BUFFER_SIZE 1024
edisonn@google.com571c70b2013-07-10 17:09:50 +000087
edisonn@google.com2af2ad92013-10-11 16:17:44 +000088/** \class SkPdfAllocator
89 *
90 * An allocator only allocates memory, and it deletes it all when the allocator is destroyed.
91 * This strategy would allow us not to do any garbage collection while we parse and/or render
92 * a pdf.
93 *
94 */
95class SkPdfAllocator {
edisonn@google.com3aac1f92013-07-02 22:42:53 +000096public:
edisonn@google.com571c70b2013-07-10 17:09:50 +000097 SkPdfAllocator() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +000098 fSizeInBytes = sizeof(*this);
edisonn@google.com571c70b2013-07-10 17:09:50 +000099 fCurrent = allocBlock();
100 fCurrentUsed = 0;
101 }
102
103 ~SkPdfAllocator();
104
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000105 // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called.
edisonn@google.com3aa35552013-08-14 18:26:20 +0000106 SkPdfNativeObject* allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000107
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000108 // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000109 void* alloc(size_t bytes) {
110 void* data = malloc(bytes);
111 fHandles.push(data);
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000112 fSizeInBytes += bytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000113 return data;
114 }
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000115
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000116 // Returns the number of bytes used in this allocator.
edisonn@google.com7b328fd2013-07-11 12:53:06 +0000117 size_t bytesUsed() const {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000118 return fSizeInBytes;
119 }
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000120
121private:
122 SkTDArray<SkPdfNativeObject*> fHistory;
123 SkTDArray<void*> fHandles;
124 SkPdfNativeObject* fCurrent;
125 int fCurrentUsed;
126
127 SkPdfNativeObject* allocBlock();
128 size_t fSizeInBytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000129};
130
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000131// Type of a parsed token.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132enum SkPdfTokenType {
133 kKeyword_TokenType,
134 kObject_TokenType,
135};
136
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000137
138/** \struct PdfToken
139 *
140 * Stores the result of the parsing - a keyword or an object.
141 *
142 */
edisonn@google.com571c70b2013-07-10 17:09:50 +0000143struct PdfToken {
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000144 const char* fKeyword;
145 size_t fKeywordLength;
146 SkPdfNativeObject* fObject;
147 SkPdfTokenType fType;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000148
149 PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
150};
151
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000152/** \class SkPdfNativeTokenizer
153 *
154 * Responsible to tokenize a stream in small tokens, eityh a keyword or an object.
155 * A renderer can feed on the tokens and render a pdf.
156 *
157 */
edisonn@google.com571c70b2013-07-10 17:09:50 +0000158class SkPdfNativeTokenizer {
159public:
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000160 SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
161 SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
162 SkPdfNativeTokenizer(const unsigned char* buffer, int len,
163 SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000164
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000165 virtual ~SkPdfNativeTokenizer();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000166
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000167 // Reads one token. Returns false if there are no more tokens.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000168 bool readToken(PdfToken* token);
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000169
170 // Put back a token to be read in the nextToken read. Only one token is allowed to be put
171 // back. Must not necesaarely be the last token read.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000172 void PutBack(PdfToken token);
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000173
174 // Reads the inline image that is present in the stream. At this point we just consumed the ID
175 // token already.
edisonn@google.com78b38b12013-07-15 18:20:58 +0000176 SkPdfImageDictionary* readInlineImage();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000177
178private:
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000179 bool readTokenCore(PdfToken* token);
180
edisonn@google.com3aa35552013-08-14 18:26:20 +0000181 SkPdfNativeDoc* fDoc;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000182 SkPdfAllocator* fAllocator;
183
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000184 const unsigned char* fUncompressedStreamStart;
185 const unsigned char* fUncompressedStream;
186 const unsigned char* fUncompressedStreamEnd;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000187
188 bool fEmpty;
189 bool fHasPutBack;
190 PdfToken fPutBack;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000191};
192
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000193const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
194 SkPdfNativeObject* token,
195 SkPdfAllocator* allocator,
196 SkPdfNativeDoc* doc);
197
edisonn@google.comcf2cfa12013-08-21 16:31:37 +0000198#endif // SkPdfNativeTokenizer_DEFINED