blob: f6b323abc2a9ce83df96fbdf520a944012c6622d [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
edisonn@google.com3aa35552013-08-14 18:26:20 +00008#include "SkPdfNativeDoc.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +00009#include "SkPdfNativeTokenizer.h"
edisonn@google.com3aa35552013-08-14 18:26:20 +000010#include "SkPdfNativeObject.h"
edisonn@google.comaf54a512013-09-13 19:33:42 +000011#include "SkPdfReporter.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000012
edisonn@google.com571c70b2013-07-10 17:09:50 +000013#include <stdio.h>
14#include <string.h>
15#include <sys/types.h>
16#include <sys/stat.h>
edisonn@google.com3aac1f92013-07-02 22:42:53 +000017
edisonn@google.com33f11b62013-08-14 21:35:27 +000018// TODO(edisonn): for some reason on mac these files are found here, but are found from headers
19//#include "SkPdfFileTrailerDictionary_autogen.h"
20//#include "SkPdfCatalogDictionary_autogen.h"
21//#include "SkPdfPageObjectDictionary_autogen.h"
22//#include "SkPdfPageTreeNodeDictionary_autogen.h"
23#include "SkPdfHeaders_autogen.h"
24
edisonn@google.com571c70b2013-07-10 17:09:50 +000025#include "SkPdfMapper_autogen.h"
26
edisonn@google.com147adb12013-07-24 15:56:19 +000027#include "SkStream.h"
edisonn@google.com571c70b2013-07-10 17:09:50 +000028
29
edisonn@google.coma3356fc2013-07-10 18:20:06 +000030static long getFileSize(const char* filename)
edisonn@google.com571c70b2013-07-10 17:09:50 +000031{
32 struct stat stat_buf;
33 int rc = stat(filename, &stat_buf);
edisonn@google.coma3356fc2013-07-10 18:20:06 +000034 return rc == 0 ? (long)stat_buf.st_size : -1;
edisonn@google.com3aac1f92013-07-02 22:42:53 +000035}
36
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000037static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000038 while (current > start && !isPdfEOL(*(current - 1))) {
39 current--;
40 }
41 return current;
42}
43
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000044static const unsigned char* previousLineHome(const unsigned char* start, const unsigned char* current) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000045 if (current > start && isPdfEOL(*(current - 1))) {
46 current--;
47 }
48
49 // allows CR+LF, LF+CR but not two CR+CR or LF+LF
50 if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
51 current--;
52 }
53
54 while (current > start && !isPdfEOL(*(current - 1))) {
55 current--;
56 }
57
58 return current;
59}
60
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000061static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
edisonn@google.com571c70b2013-07-10 17:09:50 +000062 while (current < end && !isPdfEOL(*current)) {
63 current++;
64 }
65 current++;
66 if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
67 current++;
68 }
69 return current;
70}
71
edisonn@google.com3aa35552013-08-14 18:26:20 +000072SkPdfNativeDoc* gDoc = NULL;
edisonn@google.com571c70b2013-07-10 17:09:50 +000073
74// TODO(edisonn): NYI
75// TODO(edisonn): 3 constructuctors from URL, from stream, from file ...
76// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them
77// TODO(edisonn): testing:
78// 1) run on a lot of file
79// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ...
80// 3) irrecoverable corrupt file
edisonn@google.com147adb12013-07-24 15:56:19 +000081
edisonn@google.com3aa35552013-08-14 18:26:20 +000082SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
edisonn@google.com147adb12013-07-24 15:56:19 +000083 : fAllocator(new SkPdfAllocator())
84 , fFileContent(NULL)
85 , fContentLength(0)
86 , fRootCatalogRef(NULL)
87 , fRootCatalog(NULL) {
88 size_t size = stream->getLength();
89 void* ptr = sk_malloc_throw(size);
90 stream->read(ptr, size);
91
92 init(ptr, size);
93}
94
edisonn@google.com3aa35552013-08-14 18:26:20 +000095SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
edisonn@google.com432640a2013-07-10 22:53:40 +000096 : fAllocator(new SkPdfAllocator())
edisonn@google.com147adb12013-07-24 15:56:19 +000097 , fFileContent(NULL)
98 , fContentLength(0)
edisonn@google.com432640a2013-07-10 22:53:40 +000099 , fRootCatalogRef(NULL)
100 , fRootCatalog(NULL) {
edisonn@google.com222382b2013-07-10 22:33:10 +0000101 gDoc = this;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000102 FILE* file = fopen(path, "r");
edisonn@google.come57c62d2013-08-07 18:04:15 +0000103 // TODO(edisonn): put this in a function that can return NULL
104 if (file) {
105 size_t size = getFileSize(path);
106 void* content = sk_malloc_throw(size);
107 bool ok = (0 != fread(content, size, 1, file));
108 fclose(file);
109 if (!ok) {
110 sk_free(content);
edisonn@google.comaf54a512013-09-13 19:33:42 +0000111 SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "could not read file", NULL, NULL);
edisonn@google.come57c62d2013-08-07 18:04:15 +0000112 // TODO(edisonn): not nice to return like this from constructor, create a static
113 // function that can report NULL for failures.
114 return; // Doc will have 0 pages
115 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000116
edisonn@google.come57c62d2013-08-07 18:04:15 +0000117 init(content, size);
edisonn@google.com620edc52013-07-18 13:03:03 +0000118 }
edisonn@google.com147adb12013-07-24 15:56:19 +0000119}
120
edisonn@google.com3aa35552013-08-14 18:26:20 +0000121void SkPdfNativeDoc::init(const void* bytes, size_t length) {
edisonn@google.com147adb12013-07-24 15:56:19 +0000122 fFileContent = (const unsigned char*)bytes;
123 fContentLength = length;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000124 const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
125 const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
126 const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000127
128 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000129 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find startxref", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000130 }
131
132 long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
133
134 bool storeCatalog = true;
135 while (xrefByteOffset >= 0) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000136 const unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine);
edisonn@google.com24cdf132013-07-30 16:06:12 +0000137 xrefByteOffset = -1;
138 if (trailerStart < xrefstartKeywordLine) {
139 readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
140 storeCatalog = false;
141 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000142 }
143
144 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
145 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
146 // load catalog
edisonn@google.com571c70b2013-07-10 17:09:50 +0000147
edisonn@google.com432640a2013-07-10 22:53:40 +0000148 if (fRootCatalogRef) {
149 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
edisonn@google.com8bad7372013-07-10 23:36:56 +0000150 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
151 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
152 if (tree && tree->isDictionary() && tree->valid()) {
153 fillPages(tree);
154 }
155 }
edisonn@google.com432640a2013-07-10 22:53:40 +0000156 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000157
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000158 // TODO(edisonn): clean up this doc, or better, let the caller call again and build a new doc
159 // caller should be a static function.
160 if (pages() == 0) {
161 loadWithoutXRef();
162 }
163
edisonn@google.com8bad7372013-07-10 23:36:56 +0000164 // TODO(edisonn): corrupted pdf, read it from beginning and rebuild (xref, trailer, or just reall all objects)
165 // 0 pages
166
edisonn@google.com571c70b2013-07-10 17:09:50 +0000167 // now actually read all objects if we want, or do it lazyly
168 // and resolve references?... or not ...
169}
170
edisonn@google.com3aa35552013-08-14 18:26:20 +0000171void SkPdfNativeDoc::loadWithoutXRef() {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000172 const unsigned char* current = fFileContent;
173 const unsigned char* end = fFileContent + fContentLength;
174
175 // TODO(edisonn): read pdf version
176 current = ignoreLine(current, end);
177
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000178 current = skipPdfWhiteSpaces(current, end);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000179 while (current < end) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000180 SkPdfNativeObject token;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000181 current = nextObject(current, end, &token, NULL, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000182 if (token.isInteger()) {
183 int id = (int)token.intValue();
184
185 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000186 current = nextObject(current, end, &token, NULL, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000187 // int generation = (int)token.intValue(); // TODO(edisonn): ignored for now
188
189 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000190 current = nextObject(current, end, &token, NULL, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000191 // TODO(edisonn): must be obj, return error if not? ignore ?
192 if (!token.isKeyword("obj")) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000193 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find obj", NULL, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000194 continue;
195 }
196
197 while (fObjects.count() < id + 1) {
198 reset(fObjects.append());
199 }
200
201 fObjects[id].fOffset = current - fFileContent;
202
edisonn@google.com3aa35552013-08-14 18:26:20 +0000203 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000204 current = nextObject(current, end, obj, fAllocator, this);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000205
206 fObjects[id].fResolvedReference = obj;
207 fObjects[id].fObj = obj;
edisonn@google.comaf54a512013-09-13 19:33:42 +0000208 fObjects[id].fIsReferenceResolved = true;
209
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000210
211 // set objects
212 } else if (token.isKeyword("trailer")) {
213 long dummy;
214 current = readTrailer(current, end, true, &dummy, true);
215 } else if (token.isKeyword("startxref")) {
216 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000217 current = nextObject(current, end, &token, NULL, NULL); // ignore
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000218 }
219
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000220 current = skipPdfWhiteSpaces(current, end);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000221 }
222
edisonn@google.com4f898b72013-08-07 21:11:57 +0000223 // TODO(edisonn): hack, detect root catalog - we need to implement liniarized support, and remove this hack.
224 if (!fRootCatalogRef) {
225 for (unsigned int i = 0 ; i < objects(); i++) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000226 SkPdfNativeObject* obj = object(i);
227 SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
edisonn@google.com4f898b72013-08-07 21:11:57 +0000228 if (root && root->isReference()) {
229 fRootCatalogRef = root;
230 }
231 }
232 }
233
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000234 if (fRootCatalogRef) {
235 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
236 if (fRootCatalog->isDictionary() && fRootCatalog->valid()) {
237 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
238 if (tree && tree->isDictionary() && tree->valid()) {
239 fillPages(tree);
240 }
241 }
242 }
243
edisonn@google.com4f898b72013-08-07 21:11:57 +0000244
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000245}
246
edisonn@google.com571c70b2013-07-10 17:09:50 +0000247// TODO(edisonn): NYI
edisonn@google.com3aa35552013-08-14 18:26:20 +0000248SkPdfNativeDoc::~SkPdfNativeDoc() {
edisonn@google.com147adb12013-07-24 15:56:19 +0000249 sk_free((void*)fFileContent);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000250 delete fAllocator;
251}
252
edisonn@google.com3aa35552013-08-14 18:26:20 +0000253const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, const unsigned char* trailerEnd) {
254 SkPdfNativeObject xref;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000255 const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL);
edisonn@google.com2273f9b2013-08-06 21:48:44 +0000256
257 if (!xref.isKeyword("xref")) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000258 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref", NULL, NULL);
edisonn@google.com2273f9b2013-08-06 21:48:44 +0000259 return trailerEnd;
260 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000261
edisonn@google.com3aa35552013-08-14 18:26:20 +0000262 SkPdfNativeObject token;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000263 while (current < trailerEnd) {
264 token.reset();
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000265 const unsigned char* previous = current;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000266 current = nextObject(current, trailerEnd, &token, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000267 if (!token.isInteger()) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000268 SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Done readCrossReferenceSection", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000269 return previous;
270 }
271
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000272 int startId = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000273 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000274 current = nextObject(current, trailerEnd, &token, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000275
276 if (!token.isInteger()) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000277 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000278 return current;
279 }
280
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000281 int entries = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000282
283 for (int i = 0; i < entries; i++) {
284 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000285 current = nextObject(current, trailerEnd, &token, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000286 if (!token.isInteger()) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000287 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000288 return current;
289 }
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000290 int offset = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000291
292 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000293 current = nextObject(current, trailerEnd, &token, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000294 if (!token.isInteger()) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000295 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000296 return current;
297 }
edisonn@google.coma3356fc2013-07-10 18:20:06 +0000298 int generation = (int)token.intValue();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000299
300 token.reset();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000301 current = nextObject(current, trailerEnd, &token, NULL, NULL);
edisonn@google.come878e722013-07-29 19:10:58 +0000302 if (!token.isKeyword() || token.lenstr() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000303 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection: f or n expected", &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000304 return current;
305 }
306
307 addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
308 }
309 }
edisonn@google.comaf54a512013-09-13 19:33:42 +0000310 SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, "Unexpected end of readCrossReferenceSection", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000311 return current;
312}
313
edisonn@google.com3aa35552013-08-14 18:26:20 +0000314const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, const unsigned char* trailerEnd, bool storeCatalog, long* prev, bool skipKeyword) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000315 *prev = -1;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000316
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000317 const unsigned char* current = trailerStart;
318 if (!skipKeyword) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000319 SkPdfNativeObject trailerKeyword;
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000320 // TODO(edisonn): use null allocator, and let it just fail if memory
321 // needs allocated (but no crash)!
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000322 current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000323
324 if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
325 strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
326 // TODO(edisonn): report warning, rebuild trailer from objects.
edisonn@google.comaf54a512013-09-13 19:33:42 +0000327 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readTrailer: trailer keyword expected", &trailerKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000328 return current;
329 }
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000330 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000331
edisonn@google.com3aa35552013-08-14 18:26:20 +0000332 SkPdfNativeObject token;
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000333 current = nextObject(current, trailerEnd, &token, fAllocator, NULL);
edisonn@google.com432640a2013-07-10 22:53:40 +0000334 if (!token.isDictionary()) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000335 return current;
edisonn@google.com432640a2013-07-10 22:53:40 +0000336 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000337 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
edisonn@google.com432640a2013-07-10 22:53:40 +0000338 if (!trailer->valid()) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000339 return current;
edisonn@google.com432640a2013-07-10 22:53:40 +0000340 }
edisonn@google.com571c70b2013-07-10 17:09:50 +0000341
342 if (storeCatalog) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000343 SkPdfNativeObject* ref = trailer->Root(NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000344 if (ref == NULL || !ref->isReference()) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000345 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readTrailer: unexpected root reference", ref, SkPdfNativeObject::kReference_PdfObjectType, NULL);
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000346 return current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000347 }
348 fRootCatalogRef = ref;
349 }
350
351 if (trailer->has_Prev()) {
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000352 *prev = (long)trailer->Prev(NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000353 }
354
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000355 return current;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000356}
357
edisonn@google.com3aa35552013-08-14 18:26:20 +0000358void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000359 // TODO(edisonn): security here
360 while (fObjects.count() < id + 1) {
361 reset(fObjects.append());
362 }
363
364 fObjects[id].fOffset = offset;
365 fObjects[id].fObj = NULL;
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000366 fObjects[id].fResolvedReference = NULL;
edisonn@google.comf68aed32013-08-22 15:37:21 +0000367 fObjects[id].fIsReferenceResolved = false;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000368}
369
edisonn@google.com3aa35552013-08-14 18:26:20 +0000370SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000371 long startOffset = fObjects[id].fOffset;
372 //long endOffset = fObjects[id].fOffsetEnd;
373 // TODO(edisonn): use hinted endOffset
374 // TODO(edisonn): current implementation will result in a lot of memory usage
375 // to decrease memory usage, we wither need to be smart and know where objects end, and we will
376 // alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to
377 // cache the results so it does not go twice on the same buffer
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000378 const unsigned char* current = fFileContent + startOffset;
379 const unsigned char* end = fFileContent + fContentLength;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000380
edisonn@google.com33f11b62013-08-14 21:35:27 +0000381 SkPdfNativeTokenizer tokenizer(current, end - current, fAllocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000382
edisonn@google.com3aa35552013-08-14 18:26:20 +0000383 SkPdfNativeObject idObj;
384 SkPdfNativeObject generationObj;
385 SkPdfNativeObject objKeyword;
386 SkPdfNativeObject* dict = fAllocator->allocObject();
edisonn@google.com571c70b2013-07-10 17:09:50 +0000387
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000388 current = nextObject(current, end, &idObj, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000389 if (current >= end) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000390 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000391 return NULL;
392 }
393
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000394 current = nextObject(current, end, &generationObj, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000395 if (current >= end) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000396 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading generation", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000397 return NULL;
398 }
399
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000400 current = nextObject(current, end, &objKeyword, NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000401 if (current >= end) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000402 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading keyword obj", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000403 return NULL;
404 }
405
edisonn@google.comaf54a512013-09-13 19:33:42 +0000406 if (!idObj.isInteger() || id != idObj.intValue()) {
407 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id", &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
408 }
409
410 // TODO(edisonn): verify that the generation is the right one
411 if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) {
412 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected generation", &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000413 }
414
415 if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000416 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected obj keyword", &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000417 }
418
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000419 current = nextObject(current, end, dict, fAllocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000420
421 // TODO(edisonn): report warning/error - verify last token is endobj
422
423 return dict;
424}
425
edisonn@google.com3aa35552013-08-14 18:26:20 +0000426void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
edisonn@google.comb0145ce2013-08-05 16:23:23 +0000427 SkPdfArray* kids = tree->Kids(this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000428 if (kids == NULL) {
429 *fPages.append() = (SkPdfPageObjectDictionary*)tree;
430 return;
431 }
432
433 int cnt = kids->size();
434 for (int i = 0; i < cnt; i++) {
edisonn@google.com3aa35552013-08-14 18:26:20 +0000435 SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
436 if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000437 *fPages.append() = (SkPdfPageObjectDictionary*)obj;
438 } else {
439 // TODO(edisonn): verify that it is a page tree indeed
440 fillPages((SkPdfPageTreeNodeDictionary*)obj);
441 }
442 }
443}
444
edisonn@google.com3aa35552013-08-14 18:26:20 +0000445int SkPdfNativeDoc::pages() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000446 return fPages.count();
447}
448
edisonn@google.com3aa35552013-08-14 18:26:20 +0000449SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
edisonn@google.com88fc03d2013-07-30 13:34:10 +0000450 SkASSERT(page >= 0 && page < fPages.count());
451 return fPages[page];
452}
453
454
edisonn@google.com3aa35552013-08-14 18:26:20 +0000455SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
edisonn@google.com88fc03d2013-07-30 13:34:10 +0000456 SkASSERT(page >= 0 && page < fPages.count());
edisonn@google.com571c70b2013-07-10 17:09:50 +0000457 return fPages[page]->Resources(this);
458}
459
460// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value?
edisonn@google.com3aa35552013-08-14 18:26:20 +0000461SkRect SkPdfNativeDoc::MediaBox(int page) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000462 SkPdfPageObjectDictionary* current = fPages[page];
463 while (!current->has_MediaBox() && current->has_Parent()) {
464 current = (SkPdfPageObjectDictionary*)current->Parent(this);
465 }
466 if (current) {
467 return current->MediaBox(this);
468 }
469 return SkRect::MakeEmpty();
470}
471
472// TODO(edisonn): stream or array ... ? for now only array
edisonn@google.com3aa35552013-08-14 18:26:20 +0000473SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfPage(int page,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000474 SkPdfAllocator* allocator) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000475 if (fPages[page]->isContentsAStream(this)) {
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000476 return tokenizerOfStream(fPages[page]->getContentsAsStream(this), allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000477 } else {
478 // TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart
479 // so we don't allocate new memory
480 return NULL;
481 }
482}
483
edisonn@google.com3aa35552013-08-14 18:26:20 +0000484SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfStream(SkPdfNativeObject* stream,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000485 SkPdfAllocator* allocator) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000486 if (stream == NULL) {
487 return NULL;
488 }
489
edisonn@google.com33f11b62013-08-14 21:35:27 +0000490 return new SkPdfNativeTokenizer(stream, allocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000491}
492
493// TODO(edisonn): NYI
edisonn@google.com3aa35552013-08-14 18:26:20 +0000494SkPdfNativeTokenizer* SkPdfNativeDoc::tokenizerOfBuffer(const unsigned char* buffer, size_t len,
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000495 SkPdfAllocator* allocator) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000496 // warning does not track two calls in the same buffer! the buffer is updated!
497 // make a clean copy if needed!
edisonn@google.com33f11b62013-08-14 21:35:27 +0000498 return new SkPdfNativeTokenizer(buffer, len, allocator, this);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000499}
500
edisonn@google.com3aa35552013-08-14 18:26:20 +0000501size_t SkPdfNativeDoc::objects() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000502 return fObjects.count();
503}
504
edisonn@google.com3aa35552013-08-14 18:26:20 +0000505SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000506 SkASSERT(!(i < 0 || i > fObjects.count()));
507
508 if (i < 0 || i > fObjects.count()) {
509 return NULL;
510 }
511
512 if (fObjects[i].fObj == NULL) {
513 // TODO(edisonn): when we read the cross reference sections, store the start of the next object
514 // and fill fOffsetEnd
515 fObjects[i].fObj = readObject(i);
516 }
517
518 return fObjects[i].fObj;
519}
520
edisonn@google.com3aa35552013-08-14 18:26:20 +0000521const SkPdfMapper* SkPdfNativeDoc::mapper() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000522 return fMapper;
523}
524
edisonn@google.com3aa35552013-08-14 18:26:20 +0000525SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
526 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000527 SkPdfNativeObject::makeReal(value, obj);
528 // TODO(edisonn): TRACK_FROM_CODE(obj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000529 return (SkPdfReal*)obj;
530}
531
edisonn@google.com3aa35552013-08-14 18:26:20 +0000532SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
533 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000534 SkPdfNativeObject::makeInteger(value, obj);
535 // TODO(edisonn): TRACK_FROM_CODE(obj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000536 return (SkPdfInteger*)obj;
537}
538
edisonn@google.com3aa35552013-08-14 18:26:20 +0000539SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
540 SkPdfNativeObject* obj = fAllocator->allocObject();
edisonn@google.com598cf5d2013-10-09 15:13:19 +0000541 SkPdfNativeObject::makeString(sz, len, obj);
542 // TODO(edisonn): TRACK_FROM_CODE(obj);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000543 return (SkPdfString*)obj;
544}
545
edisonn@google.com3aa35552013-08-14 18:26:20 +0000546SkPdfAllocator* SkPdfNativeDoc::allocator() const {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000547 return fAllocator;
548}
549
edisonn@google.com571c70b2013-07-10 17:09:50 +0000550// TODO(edisonn): fix infinite loop if ref to itself!
551// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference?
edisonn@google.com3aa35552013-08-14 18:26:20 +0000552SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
edisonn@google.com571c70b2013-07-10 17:09:50 +0000553 if (ref && ref->isReference()) {
554 int id = ref->referenceId();
555 // TODO(edisonn): generation/updates not supported now
556 //int gen = ref->referenceGeneration();
557
edisonn@google.com641cce92013-07-30 12:09:14 +0000558 // TODO(edisonn): verify id and gen expected
559 if (id < 0 || id >= fObjects.count()) {
edisonn@google.comaf54a512013-09-13 19:33:42 +0000560 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "resolve reference id out of bounds", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000561 return NULL;
562 }
563
edisonn@google.comf68aed32013-08-22 15:37:21 +0000564 if (fObjects[id].fIsReferenceResolved) {
edisonn@google.com276fed92013-08-01 21:20:47 +0000565
566#ifdef PDF_TRACE
567 printf("\nresolve(%s) = %s\n", ref->toString(0).c_str(), fObjects[id].fResolvedReference->toString(0, ref->toString().size() + 13).c_str());
568#endif
569
edisonn@google.comaf54a512013-09-13 19:33:42 +0000570 SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity, kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000571 return fObjects[id].fResolvedReference;
572 }
573
edisonn@google.comf68aed32013-08-22 15:37:21 +0000574 // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow here unless we check for resolved reference on next line
575 // determine if the pdf is corrupted, or we have a bug here
576
577 // avoids recursive calls
578 fObjects[id].fIsReferenceResolved = true;
579
edisonn@google.com571c70b2013-07-10 17:09:50 +0000580 if (fObjects[id].fObj == NULL) {
581 fObjects[id].fObj = readObject(id);
582 }
583
584 if (fObjects[id].fResolvedReference == NULL) {
585 if (!fObjects[id].fObj->isReference()) {
586 fObjects[id].fResolvedReference = fObjects[id].fObj;
587 } else {
588 fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
589 }
590 }
591
edisonn@google.com276fed92013-08-01 21:20:47 +0000592#ifdef PDF_TRACE
593 printf("\nresolve(%s) = %s\n", ref->toString(0).c_str(), fObjects[id].fResolvedReference->toString(0, ref->toString().size() + 13).c_str());
594#endif
edisonn@google.com571c70b2013-07-10 17:09:50 +0000595 return fObjects[id].fResolvedReference;
596 }
edisonn@google.com276fed92013-08-01 21:20:47 +0000597
edisonn@google.com571c70b2013-07-10 17:09:50 +0000598 // TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere
edisonn@google.com3aa35552013-08-14 18:26:20 +0000599 return (SkPdfNativeObject*)ref;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000600}
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000601
edisonn@google.com3aa35552013-08-14 18:26:20 +0000602size_t SkPdfNativeDoc::bytesUsed() const {
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000603 return fAllocator->bytesUsed() +
604 fContentLength +
605 fObjects.count() * sizeof(PublicObjectEntry) +
606 fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
607 sizeof(*this);
608}