blob: a96a0028d82dc3cbe60a937deeb86f8b85ae0530 [file] [log] [blame]
edisonn@google.com3aac1f92013-07-02 22:42:53 +00001#ifndef EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
2#define EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_
3
edisonn@google.com571c70b2013-07-10 17:09:50 +00004#include "SkTDArray.h"
5#include "SkTDict.h"
6#include <math.h>
7#include <string.h>
8
9class SkPdfMapper;
10class SkPdfDictionary;
edisonn@google.com78b38b12013-07-15 18:20:58 +000011class SkPdfImageDictionary;
edisonn@google.com571c70b2013-07-10 17:09:50 +000012
13// White Spaces
14#define kNUL_PdfWhiteSpace '\x00'
15#define kHT_PdfWhiteSpace '\x09'
16#define kLF_PdfWhiteSpace '\x0A'
17#define kFF_PdfWhiteSpace '\x0C'
18#define kCR_PdfWhiteSpace '\x0D'
19#define kSP_PdfWhiteSpace '\x20'
20
21// PdfDelimiters
22#define kOpenedRoundBracket_PdfDelimiter '('
23#define kClosedRoundBracket_PdfDelimiter ')'
24#define kOpenedInequityBracket_PdfDelimiter '<'
25#define kClosedInequityBracket_PdfDelimiter '>'
26#define kOpenedSquareBracket_PdfDelimiter '['
27#define kClosedSquareBracket_PdfDelimiter ']'
28#define kOpenedCurlyBracket_PdfDelimiter '{'
29#define kClosedCurlyBracket_PdfDelimiter '}'
30#define kNamed_PdfDelimiter '/'
31#define kComment_PdfDelimiter '%'
32
33#define kEscape_PdfSpecial '\\'
34#define kBackspace_PdfSpecial '\x08'
35
36// TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
37// we should evaluate all options. might be even different from one machine to another
38// 1) expand expression, let compiler optimize it
39// 2) binary search
40// 3) linear search in array
41// 4) vector (e.f. T type[256] .. return type[ch] ...
42// 5) manually build the expression with least number of operators, e.g. for consecutive
43// chars, we can use an binary equal ignoring last bit
44#define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)||((ch)==kHT_PdfWhiteSpace)||((ch)==kLF_PdfWhiteSpace)||((ch)==kFF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace)||((ch)==kSP_PdfWhiteSpace))
45
46#define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
47
48
49#define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
50 ((ch)==kClosedRoundBracket_PdfDelimiter)||\
51 ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
52 ((ch)==kClosedInequityBracket_PdfDelimiter)||\
53 ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
54 ((ch)==kClosedSquareBracket_PdfDelimiter)||\
55 ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
56 ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
57 ((ch)==kNamed_PdfDelimiter)||\
58 ((ch)==kComment_PdfDelimiter))
59
60#define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
61
62#define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000063#define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
edisonn@google.com571c70b2013-07-10 17:09:50 +000064
edisonn@google.com4ef4bed2013-07-29 22:14:45 +000065const unsigned char* skipPdfWhiteSpaces(int level, const unsigned char* buffer, const unsigned char* end);
66const unsigned char* endOfPdfToken(int level, const unsigned char* start, const unsigned char* end);
edisonn@google.com571c70b2013-07-10 17:09:50 +000067
68// TODO(edisonn): typedef read and integer tyepes? make less readable...
69//typedef double SkPdfReal;
70//typedef int64_t SkPdfInteger;
71
72// an allocator only allocates memory, and it deletes it all when the allocator is destroyed
73// this would allow us not to do any garbage collection while we parse or draw a pdf, and defere it
74// while the user is looking at the image
75
edisonn@google.com3aa35552013-08-14 18:26:20 +000076class SkPdfNativeObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +000077
78class SkPdfAllocator {
79#define BUFFER_SIZE 1024
edisonn@google.com3aa35552013-08-14 18:26:20 +000080 SkTDArray<SkPdfNativeObject*> fHistory;
edisonn@google.com571c70b2013-07-10 17:09:50 +000081 SkTDArray<void*> fHandles;
edisonn@google.com3aa35552013-08-14 18:26:20 +000082 SkPdfNativeObject* fCurrent;
edisonn@google.com571c70b2013-07-10 17:09:50 +000083 int fCurrentUsed;
84
edisonn@google.com3aa35552013-08-14 18:26:20 +000085 SkPdfNativeObject* allocBlock();
edisonn@google.coma5aaa792013-07-11 12:27:21 +000086 size_t fSizeInBytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +000087
edisonn@google.com3aac1f92013-07-02 22:42:53 +000088public:
edisonn@google.com571c70b2013-07-10 17:09:50 +000089 SkPdfAllocator() {
edisonn@google.coma5aaa792013-07-11 12:27:21 +000090 fSizeInBytes = sizeof(*this);
edisonn@google.com571c70b2013-07-10 17:09:50 +000091 fCurrent = allocBlock();
92 fCurrentUsed = 0;
93 }
94
95 ~SkPdfAllocator();
96
edisonn@google.com3aa35552013-08-14 18:26:20 +000097 SkPdfNativeObject* allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +000098
99 // TODO(edisonn): free this memory in destructor, track the usage?
100 void* alloc(size_t bytes) {
101 void* data = malloc(bytes);
102 fHandles.push(data);
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000103 fSizeInBytes += bytes;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000104 return data;
105 }
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000106
edisonn@google.com7b328fd2013-07-11 12:53:06 +0000107 size_t bytesUsed() const {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000108 return fSizeInBytes;
109 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000110};
111
edisonn@google.com3aa35552013-08-14 18:26:20 +0000112class SkPdfNativeDoc;
113const unsigned char* nextObject(int level, const unsigned char* start, const unsigned char* end, SkPdfNativeObject* token, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000114
115enum SkPdfTokenType {
116 kKeyword_TokenType,
117 kObject_TokenType,
118};
119
120struct PdfToken {
121 const char* fKeyword;
122 size_t fKeywordLength;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000123 SkPdfNativeObject* fObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000124 SkPdfTokenType fType;
125
126 PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
127};
128
129class SkPdfNativeTokenizer {
130public:
edisonn@google.com3aa35552013-08-14 18:26:20 +0000131 SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
132 SkPdfNativeTokenizer(const unsigned char* buffer, int len, const SkPdfMapper* mapper, SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000133
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000134 virtual ~SkPdfNativeTokenizer();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000135
136 bool readToken(PdfToken* token);
137 bool readTokenCore(PdfToken* token);
138 void PutBack(PdfToken token);
edisonn@google.com78b38b12013-07-15 18:20:58 +0000139 SkPdfImageDictionary* readInlineImage();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000140
141private:
edisonn@google.com3aa35552013-08-14 18:26:20 +0000142 SkPdfNativeDoc* fDoc;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000143 const SkPdfMapper* fMapper;
144 SkPdfAllocator* fAllocator;
145
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000146 const unsigned char* fUncompressedStreamStart;
147 const unsigned char* fUncompressedStream;
148 const unsigned char* fUncompressedStreamEnd;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000149
150 bool fEmpty;
151 bool fHasPutBack;
152 PdfToken fPutBack;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000153};
154
155#endif // EXPERIMENTAL_PDFVIEWER_PDFPARSER_NATIVE_SKPDFNATIVETOKENIZER_H_