blob: a67a3089bd8967fa242a0ee56547e1f85a66f312 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
2#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
3
edisonn@google.com571c70b2013-07-10 17:09:50 +00004#include "SkTDArray.h"
5#include "SkTDict.h"
6#include <math.h>
7#include <string.h>
8
edisonn@google.com571c70b2013-07-10 17:09:50 +00009class SkPdfDictionary;
edisonn@google.com78b38b12013-07-15 18:20:58 +000010class SkPdfImageDictionary;
edisonn@google.com571c70b2013-07-10 17:09:50 +000011
12// White Spaces
13#define kNUL_PdfWhiteSpace '\x00'
14#define kHT_PdfWhiteSpace '\x09'
15#define kLF_PdfWhiteSpace '\x0A'
16#define kFF_PdfWhiteSpace '\x0C'
17#define kCR_PdfWhiteSpace '\x0D'
18#define kSP_PdfWhiteSpace '\x20'
19
20// PdfDelimiters
21#define kOpenedRoundBracket_PdfDelimiter '('
22#define kClosedRoundBracket_PdfDelimiter ')'
23#define kOpenedInequityBracket_PdfDelimiter '<'
24#define kClosedInequityBracket_PdfDelimiter '>'
25#define kOpenedSquareBracket_PdfDelimiter '['
26#define kClosedSquareBracket_PdfDelimiter ']'
27#define kOpenedCurlyBracket_PdfDelimiter '{'
28#define kClosedCurlyBracket_PdfDelimiter '}'
29#define kNamed_PdfDelimiter '/'
30#define kComment_PdfDelimiter '%'
31
32#define kEscape_PdfSpecial '\\'
33#define kBackspace_PdfSpecial '\x08'
34
35// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
36// we should evaluate all options. might be even different from one machine to another
37// 1) expand expression, let compiler optimize it
38// 2) binary search
39// 3) linear search in array
40// 4) vector (e.f. T type[256] .. return type[ch] ...
41// 5) manually build the expression with least number of operators, e.g. for consecutive
42// chars, we can use an binary equal ignoring last bit
43#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
44
45#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
46
47
48#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
49 ((ch)==kClosedRoundBracket_PdfDelimiter)||\
50 ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
51 ((ch)==kClosedInequityBracket_PdfDelimiter)||\
52 ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
53 ((ch)==kClosedSquareBracket_PdfDelimiter)||\
54 ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
55 ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
56 ((ch)==kNamed_PdfDelimiter)||\
57 ((ch)==kComment_PdfDelimiter))
58
59#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
60
61#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000062#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
edisonn@google.com571c70b2013-07-10 17:09:50 +000063
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000064const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end);
65const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000066
67// TODO(edisonn): typedef read and integer tyepes? make less readable...
68//typedef double SkPdfReal;
69//typedef int64_t SkPdfInteger;
70
71// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
72// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
73// while the user is looking at the image
74
edisonn@google.com3aa35552013-08-14 18:26:20 +000075class SkPdfNativeObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +000076
77class SkPdfAllocator {
78#define BUFFER_SIZE 1024
edisonn@google.com3aa35552013-08-14 18:26:20 +000079 SkTDArray<SkPdfNativeObject*> fHistory;
edisonn@google.com571c70b2013-07-10 17:09:50 +000080 SkTDArray<void*> fHandles;
edisonn@google.com3aa35552013-08-14 18:26:20 +000081 SkPdfNativeObject* fCurrent;
edisonn@google.com571c70b2013-07-10 17:09:50 +000082 int fCurrentUsed;
83
edisonn@google.com3aa35552013-08-14 18:26:20 +000084 SkPdfNativeObject* allocBlock();
edisonn@google.coma5aaa792013-07-11 12:27:21 +000085 size_t fSizeInBytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +000086
edisonn@google.com3aac1f92013-07-02 22:42:53 +000087public:
edisonn@google.com571c70b2013-07-10 17:09:50 +000088 SkPdfAllocator() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +000089 fSizeInBytes = sizeof(*this);
edisonn@google.com571c70b2013-07-10 17:09:50 +000090 fCurrent = allocBlock();
91 fCurrentUsed = 0;
92 }
93
94 ~SkPdfAllocator();
95
edisonn@google.com3aa35552013-08-14 18:26:20 +000096 SkPdfNativeObject* allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +000097
98 // TODO(edisonn): free this memory in destructor, track the usage?
99 void* alloc(size_t bytes) {
100 void* data = malloc(bytes);
101 fHandles.push(data);
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000102 fSizeInBytes += bytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000103 return data;
104 }
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000105
edisonn@google.com7b328fd2013-07-11 12:53:06 +0000106 size_t bytesUsed() const {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000107 return fSizeInBytes;
108 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000109};
110
edisonn@google.com3aa35552013-08-14 18:26:20 +0000111class SkPdfNativeDoc;
112const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000113
114enum SkPdfTokenType {
115 kKeyword_TokenType,
116 kObject_TokenType,
117};
118
119struct PdfToken {
120 const char* fKeyword;
121 size_t fKeywordLength;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000122 SkPdfNativeObject* fObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000123 SkPdfTokenType fType;
124
125 PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
126};
127
128class SkPdfNativeTokenizer {
129public:
edisonn@google.com33f11b62013-08-14 21:35:27 +0000130 SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
131 SkPdfNativeTokenizer(const unsigned char* buffer, int len, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000132
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000133 virtual ~SkPdfNativeTokenizer();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134
135 bool readToken(PdfToken* token);
136 bool readTokenCore(PdfToken* token);
137 void PutBack(PdfToken token);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000138 SkPdfImageDictionary* readInlineImage();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000139
140private:
edisonn@google.com3aa35552013-08-14 18:26:20 +0000141 SkPdfNativeDoc* fDoc;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000142 SkPdfAllocator* fAllocator;
143
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000144 const unsigned char* fUncompressedStreamStart;
145 const unsigned char* fUncompressedStream;
146 const unsigned char* fUncompressedStreamEnd;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000147
148 bool fEmpty;
149 bool fHasPutBack;
150 PdfToken fPutBack;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000151};
152
153#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_