blob: b92c872a41d32eb6721c4e1421a18bad404ec2fd [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkPdfNativeDoc_DEFINED
9#define SkPdfNativeDoc_DEFINED
edisonn@google.com3aac1f92013-07-02 22:42:53 +000010
edisonn@google.com571c70b2013-07-10 17:09:50 +000011#include "SkRect.h"
12#include "SkTDArray.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000013
edisonn@google.com571c70b2013-07-10 17:09:50 +000014class SkCanvas;
15
16class SkPdfAllocator;
17class SkPdfMapper;
edisonn@google.com3aa35552013-08-14 18:26:20 +000018class SkPdfNativeObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +000019class SkPdfReal;
20class SkPdfInteger;
21class SkPdfString;
22class SkPdfResourceDictionary;
23class SkPdfCatalogDictionary;
24class SkPdfPageObjectDictionary;
25class SkPdfPageTreeNodeDictionary;
26
edisonn@google.com571c70b2013-07-10 17:09:50 +000027class SkPdfNativeTokenizer;
28
edisonn@google.com147adb12013-07-24 15:56:19 +000029class SkStream;
30
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000031// TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading
32// the bytes in order. For example, we can try to read the stream optimistically, but if there
33// are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can.
34// This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100)
35// But if the pdf is missing the xref, then we will have to read most of pdf to be able to render
36// page 100.
37
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000038/** \class SkPdfNativeDoc
39 *
40 * The SkPdfNativeDoc class is used to load a PDF in memory and it represents a PDF Document.
41 *
42 */
edisonn@google.com3aa35552013-08-14 18:26:20 +000043class SkPdfNativeDoc {
edisonn@google.com571c70b2013-07-10 17:09:50 +000044private:
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000045 // Information about public objects in pdf that can be referenced with ID GEN R
edisonn@google.com571c70b2013-07-10 17:09:50 +000046 struct PublicObjectEntry {
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000047 // Offset in the file where the object starts.
edisonn@google.com571c70b2013-07-10 17:09:50 +000048 long fOffset;
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000049
50 // Offset in file where the object ends. Could be used to quickly fail if there is a
51 // problem in pdf structure.
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000052 // long endOffset; // TODO(edisonn): determine the end of the object,
53 // to be used when the doc is corrupted, for fast failure.
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000054
55 // Refered object.
edisonn@google.com3aa35552013-08-14 18:26:20 +000056 SkPdfNativeObject* fObj;
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000057
58 // If refered object is a reference, we resolve recursively the reference until we find
59 // the real object.
edisonn@google.com3aa35552013-08-14 18:26:20 +000060 SkPdfNativeObject* fResolvedReference;
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000061
62 // Used to break a recursive reference to itself.
edisonn@google.comf68aed32013-08-22 15:37:21 +000063 bool fIsReferenceResolved;
edisonn@google.com571c70b2013-07-10 17:09:50 +000064 };
65
edisonn@google.com3aac1f92013-07-02 22:42:53 +000066public:
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000067 // TODO(edisonn) should be deprecated
edisonn@google.com3aa35552013-08-14 18:26:20 +000068 SkPdfNativeDoc(const char* path);
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000069
70 // TODO(edisonn) should be deprecated
scroggo@google.com90922892013-11-14 19:09:27 +000071 // FIXME: Untested.
edisonn@google.com3aa35552013-08-14 18:26:20 +000072 SkPdfNativeDoc(SkStream* stream);
edisonn@google.com147adb12013-07-24 15:56:19 +000073
edisonn@google.com3aa35552013-08-14 18:26:20 +000074 ~SkPdfNativeDoc();
edisonn@google.com571c70b2013-07-10 17:09:50 +000075
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000076 // returns the number of pages in the pdf
edisonn@google.com571c70b2013-07-10 17:09:50 +000077 int pages() const;
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000078
79 // returns the page resources
edisonn@google.com571c70b2013-07-10 17:09:50 +000080 SkPdfResourceDictionary* pageResources(int page);
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000081
82 // returns the page's mediabox i points - the page physical boundaries.
edisonn@google.com951d6532013-07-10 23:17:31 +000083 SkRect MediaBox(int page);
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000084
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +000085 //returns objects that are references and can be queried.
86 size_t objects() const;
87
88 // returns an object.
89 // TODO(edisonn): pdf updates are not supported yet.
90 // add generation parameter to support page updates.
91 SkPdfNativeObject* object(int id /*, int generation*/ );
92
93 // returns the object that holds all the page informnation
94 // TODO(edisonn): pdf updates are not supported yet.
95 // add generation parameter to support page updates.
96 SkPdfPageObjectDictionary* page(int page/*, int generation*/);
97
98 // TODO(edisonn): deprecate the mapper - was used when we supported multiple
99 // parsers (podofo)
100 // The mapper maps allows an object to be mapped to a different dictionary type
101 // and it could verify the integrity of the object.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000102 const SkPdfMapper* mapper() const;
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +0000103
104 // Allocator of the pdf - this holds all objects that are publicly referenced
105 // and all the objects that they refer
edisonn@google.com571c70b2013-07-10 17:09:50 +0000106 SkPdfAllocator* allocator() const;
107
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +0000108 // Allows a renderer to create values to be dumped on the stack for operators to process them.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000109 SkPdfReal* createReal(double value) const;
110 SkPdfInteger* createInteger(int value) const;
111 // the string does not own the char*
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000112 SkPdfString* createString(const unsigned char* sz, size_t len) const;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000113
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +0000114 // Resolve a reference object. Will recursively resolve the reference
115 // until a real object is found
edisonn@google.com3aa35552013-08-14 18:26:20 +0000116 SkPdfNativeObject* resolveReference(SkPdfNativeObject* ref);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000117
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000118 // Reports an approximation of all the memory usage.
edisonn@google.com7b328fd2013-07-11 12:53:06 +0000119 size_t bytesUsed() const;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000120
edisonn@google.com571c70b2013-07-10 17:09:50 +0000121private:
122
edisonn@google.com147adb12013-07-24 15:56:19 +0000123 // Takes ownership of bytes.
124 void init(const void* bytes, size_t length);
edisonn@google.com2af2ad9c2013-10-11 16:17:44 +0000125
126 // loads a pdf that has missing xref
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000127 void loadWithoutXRef();
edisonn@google.com147adb12013-07-24 15:56:19 +0000128
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000129 const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart,
130 const unsigned char* trailerEnd);
131 const unsigned char* readTrailer(const unsigned char* trailerStart,
132 const unsigned char* trailerEnd,
133 bool storeCatalog, long* prev, bool skipKeyword);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000135 // TODO(edisonn): pdfs with updates not supported right now, generation ignored.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000136 void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
137 static void reset(PublicObjectEntry* obj) {
138 obj->fObj = NULL;
139 obj->fResolvedReference = NULL;
140 obj->fOffset = -1;
edisonn@google.comf68aed32013-08-22 15:37:21 +0000141 obj->fIsReferenceResolved = false;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000142 }
143
edisonn@google.com3aa35552013-08-14 18:26:20 +0000144 SkPdfNativeObject* readObject(int id/*, int generation*/);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000145
146 void fillPages(SkPdfPageTreeNodeDictionary* tree);
147
edisonn@google.com571c70b2013-07-10 17:09:50 +0000148 SkPdfAllocator* fAllocator;
149 SkPdfMapper* fMapper;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000150 const unsigned char* fFileContent;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000151 size_t fContentLength;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000152 SkPdfNativeObject* fRootCatalogRef;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000153 SkPdfCatalogDictionary* fRootCatalog;
154
155 mutable SkTDArray<PublicObjectEntry> fObjects;
156 SkTDArray<SkPdfPageObjectDictionary*> fPages;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000157};
158
edisonn@google.comcf2cfa12013-08-21 16:31:37 +0000159#endif // SkPdfNativeDoc_DEFINED