blob: f278ea1a2e6e911af04146fc93dee7bb928cfd56 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
edisonn@google.com3aa35552013-08-14 18:26:20 +00008#include "SkPdfNativeDoc.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00009#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +000010#include "SkPdfNativeObject.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000011
edisonn@google.com571c70b2013-07-10 17:09:50 +000012#include <stdio.h>
13#include <string.h>
14#include <sys/types.h>
15#include <sys/stat.h>
edisonn@google.com3aac1f92013-07-02 22:42:53 +000016
edisonn@google.com33f11b62013-08-14 21:35:27 +000017// TODO(edisonn): for some reason on mac these files are found here, but are found from headers
18//#include "SkPdfFileTrailerDictionary_autogen.h"
19//#include "SkPdfCatalogDictionary_autogen.h"
20//#include "SkPdfPageObjectDictionary_autogen.h"
21//#include "SkPdfPageTreeNodeDictionary_autogen.h"
22#include "SkPdfHeaders_autogen.h"
23
edisonn@google.com571c70b2013-07-10 17:09:50 +000024#include "SkPdfMapper_autogen.h"
25
edisonn@google.com147adb12013-07-24 15:56:19 +000026#include "SkStream.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +000027
28
edisonn@google.coma3356fc2013-07-10 18:20:06 +000029static long getFileSize(const char* filename)
edisonn@google.com571c70b2013-07-10 17:09:50 +000030{
31 struct stat stat_buf;
32 int rc = stat(filename, &stat_buf);
edisonn@google.coma3356fc2013-07-10 18:20:06 +000033 return rc == 0 ? (long)stat_buf.st_size : -1;
edisonn@google.com3aac1f92013-07-02 22:42:53 +000034}
35
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000036static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000037 while (current > start && !isPdfEOL(*(current - 1))) {
38 current--;
39 }
40 return current;
41}
42
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000043static const unsigned char* previousLineHome(const unsigned char* start, const unsigned char* current) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000044 if (current > start && isPdfEOL(*(current - 1))) {
45 current--;
46 }
47
48 // allows CR+LF, LF+CR but not two CR+CR or LF+LF
49 if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
50 current--;
51 }
52
53 while (current > start && !isPdfEOL(*(current - 1))) {
54 current--;
55 }
56
57 return current;
58}
59
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000060static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000061 while (current < end && !isPdfEOL(*current)) {
62 current++;
63 }
64 current++;
65 if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
66 current++;
67 }
68 return current;
69}
70
edisonn@google.com3aa35552013-08-14 18:26:20 +000071SkPdfNativeDoc* gDoc = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +000072
73// TODO(edisonn): NYI
74// TODO(edisonn): 3 constructuctors from URL, from stream, from file ...
75// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them
76// TODO(edisonn): testing:
77// 1) run on a lot of file
78// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ...
79// 3) irrecoverable corrupt file
edisonn@google.com147adb12013-07-24 15:56:19 +000080
edisonn@google.com3aa35552013-08-14 18:26:20 +000081SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
edisonn@google.com147adb12013-07-24 15:56:19 +000082 : fAllocator(new SkPdfAllocator())
83 , fFileContent(NULL)
84 , fContentLength(0)
85 , fRootCatalogRef(NULL)
86 , fRootCatalog(NULL) {
87 size_t size = stream->getLength();
88 void* ptr = sk_malloc_throw(size);
89 stream->read(ptr, size);
90
91 init(ptr, size);
92}
93
edisonn@google.com3aa35552013-08-14 18:26:20 +000094SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
edisonn@google.com432640a2013-07-10 22:53:40 +000095 : fAllocator(new SkPdfAllocator())
edisonn@google.com147adb12013-07-24 15:56:19 +000096 , fFileContent(NULL)
97 , fContentLength(0)
edisonn@google.com432640a2013-07-10 22:53:40 +000098 , fRootCatalogRef(NULL)
99 , fRootCatalog(NULL) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000100 gDoc = this;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000101 FILE* file = fopen(path, "r");
edisonn@google.come57c62d2013-08-07 18:04:15 +0000102 // TODO(edisonn): put this in a function that can return NULL
103 if (file) {
104 size_t size = getFileSize(path);
105 void* content = sk_malloc_throw(size);
106 bool ok = (0 != fread(content, size, 1, file));
107 fclose(file);
108 if (!ok) {
109 sk_free(content);
110 // TODO(edisonn): report read error
111 // TODO(edisonn): not nice to return like this from constructor, create a static
112 // function that can report NULL for failures.
113 return; // Doc will have 0 pages
114 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115
edisonn@google.come57c62d2013-08-07 18:04:15 +0000116 init(content, size);
edisonn@google.com620edc52013-07-18 13:03:03 +0000117 }
edisonn@google.com147adb12013-07-24 15:56:19 +0000118}
119
edisonn@google.com3aa35552013-08-14 18:26:20 +0000120void SkPdfNativeDoc::init(const void* bytes, size_t length) {
edisonn@google.com147adb12013-07-24 15:56:19 +0000121 fFileContent = (const unsigned char*)bytes;
122 fContentLength = length;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000123 const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
124 const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
125 const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126
127 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
128 // TODO(edisonn): report/issue
129 }
130
131 long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
132
133 bool storeCatalog = true;
134 while (xrefByteOffset >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000135 const unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine);
edisonn@google.com24cdf132013-07-30 16:06:12 +0000136 xrefByteOffset = -1;
137 if (trailerStart < xrefstartKeywordLine) {
138 readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
139 storeCatalog = false;
140 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000141 }
142
143 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
144 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
145 // load catalog
edisonn@google.com571c70b2013-07-10 17:09:50 +0000146
edisonn@google.com432640a2013-07-10 22:53:40 +0000147 if (fRootCatalogRef) {
148 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
edisonn@google.com8bad7372013-07-10 23:36:56 +0000149 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
150 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
151 if (tree && tree->isDictionary() && tree->valid()) {
152 fillPages(tree);
153 }
154 }
edisonn@google.com432640a2013-07-10 22:53:40 +0000155 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000156
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000157 // TODO(edisonn): clean up this doc, or better, let the caller call again and build a new doc
158 // caller should be a static function.
159 if (pages() == 0) {
160 loadWithoutXRef();
161 }
162
edisonn@google.com8bad7372013-07-10 23:36:56 +0000163 // TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, trailer, or just reall all objects)
164 // 0 pages
165
edisonn@google.com571c70b2013-07-10 17:09:50 +0000166 // now actually read all objects if we want, or do it lazyly
167 // and resolve references?... or not ...
168}
169
edisonn@google.com3aa35552013-08-14 18:26:20 +0000170void SkPdfNativeDoc::loadWithoutXRef() {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000171 const unsigned char* current = fFileContent;
172 const unsigned char* end = fFileContent + fContentLength;
173
174 // TODO(edisonn): read pdf version
175 current = ignoreLine(current, end);
176
177 current = skipPdfWhiteSpaces(0, current, end);
178 while (current < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000179 SkPdfNativeObject token;
edisonn@google.combca421b2013-09-05 20:00:21 +0000180 current = nextObject(0, current, end, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000181 if (token.isInteger()) {
182 int id = (int)token.intValue();
183
184 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000185 current = nextObject(0, current, end, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000186 // int generation = (int)token.intValue(); // TODO(edisonn): ignored for now
187
188 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000189 current = nextObject(0, current, end, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000190 // TODO(edisonn): must be obj, return error if not? ignore ?
191 if (!token.isKeyword("obj")) {
192 continue;
193 }
194
195 while (fObjects.count() < id + 1) {
196 reset(fObjects.append());
197 }
198
199 fObjects[id].fOffset = current - fFileContent;
200
edisonn@google.com3aa35552013-08-14 18:26:20 +0000201 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000202 current = nextObject(0, current, end, obj, fAllocator, this PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000203
204 fObjects[id].fResolvedReference = obj;
205 fObjects[id].fObj = obj;
206
207 // set objects
208 } else if (token.isKeyword("trailer")) {
209 long dummy;
210 current = readTrailer(current, end, true, &dummy, true);
211 } else if (token.isKeyword("startxref")) {
212 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000213 current = nextObject(0, current, end, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent)); // ignore
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000214 }
215
216 current = skipPdfWhiteSpaces(0, current, end);
217 }
218
edisonn@google.com4f898b72013-08-07 21:11:57 +0000219 // TODO(edisonn): hack, detect root catalog - we need to implement liniarized support, and remove this hack.
220 if (!fRootCatalogRef) {
221 for (unsigned int i = 0 ; i < objects(); i++) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000222 SkPdfNativeObject* obj = object(i);
223 SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
edisonn@google.com4f898b72013-08-07 21:11:57 +0000224 if (root && root->isReference()) {
225 fRootCatalogRef = root;
226 }
227 }
228 }
229
230
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000231 if (fRootCatalogRef) {
232 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
233 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
234 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
235 if (tree && tree->isDictionary() && tree->valid()) {
236 fillPages(tree);
237 }
238 }
239 }
240
edisonn@google.com4f898b72013-08-07 21:11:57 +0000241
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000242}
243
edisonn@google.com571c70b2013-07-10 17:09:50 +0000244// TODO(edisonn): NYI
edisonn@google.com3aa35552013-08-14 18:26:20 +0000245SkPdfNativeDoc::~SkPdfNativeDoc() {
edisonn@google.com147adb12013-07-24 15:56:19 +0000246 sk_free((void*)fFileContent);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000247 delete fAllocator;
248}
249
edisonn@google.com3aa35552013-08-14 18:26:20 +0000250const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd) {
251 SkPdfNativeObject xref;
edisonn@google.combca421b2013-09-05 20:00:21 +0000252 const unsigned char* current = nextObject(0, xrefStart, trailerEnd, &xref, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com2273f9b2013-08-06 21:48:44 +0000253
254 if (!xref.isKeyword("xref")) {
255 return trailerEnd;
256 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000257
edisonn@google.com3aa35552013-08-14 18:26:20 +0000258 SkPdfNativeObject token;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000259 while (current < trailerEnd) {
260 token.reset();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000261 const unsigned char* previous = current;
edisonn@google.combca421b2013-09-05 20:00:21 +0000262 current = nextObject(0, current, trailerEnd, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000263 if (!token.isInteger()) {
264 return previous;
265 }
266
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000267 int startId = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000268 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000269 current = nextObject(0, current, trailerEnd, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000270
271 if (!token.isInteger()) {
272 // TODO(edisonn): report/warning
273 return current;
274 }
275
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000276 int entries = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000277
278 for (int i = 0; i < entries; i++) {
279 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000280 current = nextObject(0, current, trailerEnd, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000281 if (!token.isInteger()) {
282 // TODO(edisonn): report/warning
283 return current;
284 }
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000285 int offset = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000286
287 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000288 current = nextObject(0, current, trailerEnd, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000289 if (!token.isInteger()) {
290 // TODO(edisonn): report/warning
291 return current;
292 }
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000293 int generation = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000294
295 token.reset();
edisonn@google.combca421b2013-09-05 20:00:21 +0000296 current = nextObject(0, current, trailerEnd, &token, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.come878e722013-07-29 19:10:58 +0000297 if (!token.isKeyword() || token.lenstr() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000298 // TODO(edisonn): report/warning
299 return current;
300 }
301
302 addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
303 }
304 }
305 // TODO(edisonn): it should never get here? there is no trailer?
306 return current;
307}
308
edisonn@google.com3aa35552013-08-14 18:26:20 +0000309const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000310 *prev = -1;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000311
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000312 const unsigned char* current = trailerStart;
313 if (!skipKeyword) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000314 SkPdfNativeObject trailerKeyword;
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000315 // TODO(edisonn): use null allocator, and let it just fail if memory
316 // needs allocated (but no crash)!
edisonn@google.combca421b2013-09-05 20:00:21 +0000317 current = nextObject(0, current, trailerEnd, &trailerKeyword, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000318
319 if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
320 strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
321 // TODO(edisonn): report warning, rebuild trailer from objects.
322 return current;
323 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000324 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000325
edisonn@google.com3aa35552013-08-14 18:26:20 +0000326 SkPdfNativeObject token;
edisonn@google.combca421b2013-09-05 20:00:21 +0000327 current = nextObject(0, current, trailerEnd, &token, fAllocator, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com432640a2013-07-10 22:53:40 +0000328 if (!token.isDictionary()) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000329 return current;
edisonn@google.com432640a2013-07-10 22:53:40 +0000330 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000331 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
edisonn@google.com432640a2013-07-10 22:53:40 +0000332 if (!trailer->valid()) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000333 return current;
edisonn@google.com432640a2013-07-10 22:53:40 +0000334 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000335
336 if (storeCatalog) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000337 SkPdfNativeObject* ref = trailer->Root(NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000338 if (ref == NULL || !ref->isReference()) {
339 // TODO(edisonn): oops, we have to fix the corrup pdf file
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000340 return current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000341 }
342 fRootCatalogRef = ref;
343 }
344
345 if (trailer->has_Prev()) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000346 *prev = (long)trailer->Prev(NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000347 }
348
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000349 return current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000350}
351
edisonn@google.com3aa35552013-08-14 18:26:20 +0000352void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000353 // TODO(edisonn): security here
354 while (fObjects.count() < id + 1) {
355 reset(fObjects.append());
356 }
357
358 fObjects[id].fOffset = offset;
359 fObjects[id].fObj = NULL;
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000360 fObjects[id].fResolvedReference = NULL;
edisonn@google.comf68aed32013-08-22 15:37:21 +0000361 fObjects[id].fIsReferenceResolved = false;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000362}
363
edisonn@google.com3aa35552013-08-14 18:26:20 +0000364SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000365 long startOffset = fObjects[id].fOffset;
366 //long endOffset = fObjects[id].fOffsetEnd;
367 // TODO(edisonn): use hinted endOffset
368 // TODO(edisonn): current implementation will result in a lot of memory usage
369 // to decrease memory usage, we wither need to be smart and know where objects end, and we will
370 // alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to
371 // cache the results so it does not go twice on the same buffer
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000372 const unsigned char* current = fFileContent + startOffset;
373 const unsigned char* end = fFileContent + fContentLength;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000374
edisonn@google.com33f11b62013-08-14 21:35:27 +0000375 SkPdfNativeTokenizer tokenizer(current, end - current, fAllocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000376
edisonn@google.com3aa35552013-08-14 18:26:20 +0000377 SkPdfNativeObject idObj;
378 SkPdfNativeObject generationObj;
379 SkPdfNativeObject objKeyword;
380 SkPdfNativeObject* dict = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000381
edisonn@google.combca421b2013-09-05 20:00:21 +0000382 current = nextObject(0, current, end, &idObj, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000383 if (current >= end) {
384 // TODO(edisonn): report warning/error
385 return NULL;
386 }
387
edisonn@google.combca421b2013-09-05 20:00:21 +0000388 current = nextObject(0, current, end, &generationObj, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000389 if (current >= end) {
390 // TODO(edisonn): report warning/error
391 return NULL;
392 }
393
edisonn@google.combca421b2013-09-05 20:00:21 +0000394 current = nextObject(0, current, end, &objKeyword, NULL, NULL PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000395 if (current >= end) {
396 // TODO(edisonn): report warning/error
397 return NULL;
398 }
399
400 if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue()/* || generation != generationObj.intValue()*/) {
401 // TODO(edisonn): report warning/error
402 }
403
404 if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
405 // TODO(edisonn): report warning/error
406 }
407
edisonn@google.combca421b2013-09-05 20:00:21 +0000408 current = nextObject(1, current, end, dict, fAllocator, this PUT_TRACK_STREAM_ARGS_EXPL2(0, fFileContent));
edisonn@google.com571c70b2013-07-10 17:09:50 +0000409
410 // TODO(edisonn): report warning/error - verify last token is endobj
411
412 return dict;
413}
414
edisonn@google.com3aa35552013-08-14 18:26:20 +0000415void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000416 SkPdfArray* kids = tree->Kids(this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000417 if (kids == NULL) {
418 *fPages.append() = (SkPdfPageObjectDictionary*)tree;
419 return;
420 }
421
422 int cnt = kids->size();
423 for (int i = 0; i < cnt; i++) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000424 SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
425 if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000426 *fPages.append() = (SkPdfPageObjectDictionary*)obj;
427 } else {
428 // TODO(edisonn): verify that it is a page tree indeed
429 fillPages((SkPdfPageTreeNodeDictionary*)obj);
430 }
431 }
432}
433
edisonn@google.com3aa35552013-08-14 18:26:20 +0000434int SkPdfNativeDoc::pages() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000435 return fPages.count();
436}
437
edisonn@google.com3aa35552013-08-14 18:26:20 +0000438SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
edisonn@google.com88fc03d2013-07-30 13:34:10 +0000439 SkASSERT(page >= 0 && page < fPages.count());
440 return fPages[page];
441}
442
443
edisonn@google.com3aa35552013-08-14 18:26:20 +0000444SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
edisonn@google.com88fc03d2013-07-30 13:34:10 +0000445 SkASSERT(page >= 0 && page < fPages.count());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000446 return fPages[page]->Resources(this);
447}
448
449// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value?
edisonn@google.com3aa35552013-08-14 18:26:20 +0000450SkRect SkPdfNativeDoc::MediaBox(int page) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000451 SkPdfPageObjectDictionary* current = fPages[page];
452 while (!current->has_MediaBox() && current->has_Parent()) {
453 current = (SkPdfPageObjectDictionary*)current->Parent(this);
454 }
455 if (current) {
456 return current->MediaBox(this);
457 }
458 return SkRect::MakeEmpty();
459}
460
461// TODO(edisonn): stream or array ... ? for now only array
edisonn@google.com3aa35552013-08-14 18:26:20 +0000462SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfPage(int page,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000463 SkPdfAllocator* allocator) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000464 if (fPages[page]->isContentsAStream(this)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000465 return tokenizerOfStream(fPages[page]->getContentsAsStream(this), allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000466 } else {
467 // TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart
468 // so we don't allocate new memory
469 return NULL;
470 }
471}
472
edisonn@google.com3aa35552013-08-14 18:26:20 +0000473SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfStream(SkPdfNativeObject* stream,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000474 SkPdfAllocator* allocator) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000475 if (stream == NULL) {
476 return NULL;
477 }
478
edisonn@google.com33f11b62013-08-14 21:35:27 +0000479 return new SkPdfNativeTokenizer(stream, allocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000480}
481
482// TODO(edisonn): NYI
edisonn@google.com3aa35552013-08-14 18:26:20 +0000483SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfBuffer(const unsigned char* buffer, size_t len,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000484 SkPdfAllocator* allocator) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000485 // warning does not track two calls in the same buffer! the buffer is updated!
486 // make a clean copy if needed!
edisonn@google.com33f11b62013-08-14 21:35:27 +0000487 return new SkPdfNativeTokenizer(buffer, len, allocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000488}
489
edisonn@google.com3aa35552013-08-14 18:26:20 +0000490size_t SkPdfNativeDoc::objects() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000491 return fObjects.count();
492}
493
edisonn@google.com3aa35552013-08-14 18:26:20 +0000494SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000495 SkASSERT(!(i < 0 || i > fObjects.count()));
496
497 if (i < 0 || i > fObjects.count()) {
498 return NULL;
499 }
500
501 if (fObjects[i].fObj == NULL) {
502 // TODO(edisonn): when we read the cross reference sections, store the start of the next object
503 // and fill fOffsetEnd
504 fObjects[i].fObj = readObject(i);
505 }
506
507 return fObjects[i].fObj;
508}
509
edisonn@google.com3aa35552013-08-14 18:26:20 +0000510const SkPdfMapper* SkPdfNativeDoc::mapper() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000511 return fMapper;
512}
513
edisonn@google.com3aa35552013-08-14 18:26:20 +0000514SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
515 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000516 SkPdfNativeObject::makeReal(value, obj PUT_TRACK_PARAMETERS_SRC);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000517 return (SkPdfReal*)obj;
518}
519
edisonn@google.com3aa35552013-08-14 18:26:20 +0000520SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
521 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000522 SkPdfNativeObject::makeInteger(value, obj PUT_TRACK_PARAMETERS_SRC);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000523 return (SkPdfInteger*)obj;
524}
525
edisonn@google.com3aa35552013-08-14 18:26:20 +0000526SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
527 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.combca421b2013-09-05 20:00:21 +0000528 SkPdfNativeObject::makeString(sz, len, obj PUT_TRACK_PARAMETERS_SRC);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000529 return (SkPdfString*)obj;
530}
531
edisonn@google.com3aa35552013-08-14 18:26:20 +0000532SkPdfAllocator* SkPdfNativeDoc::allocator() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000533 return fAllocator;
534}
535
edisonn@google.com571c70b2013-07-10 17:09:50 +0000536// TODO(edisonn): fix infinite loop if ref to itself!
537// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference?
edisonn@google.com3aa35552013-08-14 18:26:20 +0000538SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000539 if (ref && ref->isReference()) {
540 int id = ref->referenceId();
541 // TODO(edisonn): generation/updates not supported now
542 //int gen = ref->referenceGeneration();
543
edisonn@google.com641cce92013-07-30 12:09:14 +0000544 // TODO(edisonn): verify id and gen expected
545 if (id < 0 || id >= fObjects.count()) {
546 // TODO(edisonn): report error/warning
edisonn@google.com571c70b2013-07-10 17:09:50 +0000547 return NULL;
548 }
549
edisonn@google.comf68aed32013-08-22 15:37:21 +0000550 if (fObjects[id].fIsReferenceResolved) {
edisonn@google.com276fed92013-08-01 21:20:47 +0000551
552#ifdef PDF_TRACE
553 printf("\nresolve(%s) = %s\n", ref->toString(0).c_str(), fObjects[id].fResolvedReference->toString(0, ref->toString().size() + 13).c_str());
554#endif
555
edisonn@google.comf68aed32013-08-22 15:37:21 +0000556 // TODO(edisonn): for known good documents, assert here THAT THE REFERENCE IS NOT null
edisonn@google.com571c70b2013-07-10 17:09:50 +0000557 return fObjects[id].fResolvedReference;
558 }
559
edisonn@google.comf68aed32013-08-22 15:37:21 +0000560 // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow here unless we check for resolved reference on next line
561 // determine if the pdf is corrupted, or we have a bug here
562
563 // avoids recursive calls
564 fObjects[id].fIsReferenceResolved = true;
565
edisonn@google.com571c70b2013-07-10 17:09:50 +0000566 if (fObjects[id].fObj == NULL) {
567 fObjects[id].fObj = readObject(id);
568 }
569
570 if (fObjects[id].fResolvedReference == NULL) {
571 if (!fObjects[id].fObj->isReference()) {
572 fObjects[id].fResolvedReference = fObjects[id].fObj;
573 } else {
574 fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
575 }
576 }
577
edisonn@google.com276fed92013-08-01 21:20:47 +0000578#ifdef PDF_TRACE
579 printf("\nresolve(%s) = %s\n", ref->toString(0).c_str(), fObjects[id].fResolvedReference->toString(0, ref->toString().size() + 13).c_str());
580#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000581 return fObjects[id].fResolvedReference;
582 }
edisonn@google.com276fed92013-08-01 21:20:47 +0000583
edisonn@google.com571c70b2013-07-10 17:09:50 +0000584 // TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere
edisonn@google.com3aa35552013-08-14 18:26:20 +0000585 return (SkPdfNativeObject*)ref;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000586}
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000587
edisonn@google.com3aa35552013-08-14 18:26:20 +0000588size_t SkPdfNativeDoc::bytesUsed() const {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000589 return fAllocator->bytesUsed() +
590 fContentLength +
591 fObjects.count() * sizeof(PublicObjectEntry) +
592 fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
593 sizeof(*this);
594}