blob: d8241376c179ec4169c0650ac6bcc73cd9183457 [file] [log] [blame]
edisonn@google.comcf2cfa12013-08-21 16:31:37 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkPdfNativeDoc_DEFINED
9#define SkPdfNativeDoc_DEFINED
edisonn@google.com3aac1f92013-07-02 22:42:53 +000010
edisonn@google.com571c70b2013-07-10 17:09:50 +000011#include "SkRect.h"
12#include "SkTDArray.h"
edisonn@google.com3aac1f92013-07-02 22:42:53 +000013
edisonn@google.com571c70b2013-07-10 17:09:50 +000014class SkCanvas;
15
16class SkPdfAllocator;
17class SkPdfMapper;
edisonn@google.com3aa35552013-08-14 18:26:20 +000018class SkPdfNativeObject;
edisonn@google.com571c70b2013-07-10 17:09:50 +000019class SkPdfReal;
20class SkPdfInteger;
21class SkPdfString;
22class SkPdfResourceDictionary;
23class SkPdfCatalogDictionary;
24class SkPdfPageObjectDictionary;
25class SkPdfPageTreeNodeDictionary;
26
edisonn@google.com571c70b2013-07-10 17:09:50 +000027class SkPdfNativeTokenizer;
28
edisonn@google.com147adb12013-07-24 15:56:19 +000029class SkStream;
30
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000031// TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading
32// the bytes in order. For example, we can try to read the stream optimistically, but if there
33// are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can.
34// This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100)
35// But if the pdf is missing the xref, then we will have to read most of pdf to be able to render
36// page 100.
37
edisonn@google.com2af2ad92013-10-11 16:17:44 +000038/** \class SkPdfNativeDoc
39 *
40 * The SkPdfNativeDoc class is used to load a PDF in memory and it represents a PDF Document.
41 *
42 */
edisonn@google.com3aa35552013-08-14 18:26:20 +000043class SkPdfNativeDoc {
edisonn@google.com571c70b2013-07-10 17:09:50 +000044private:
edisonn@google.com2af2ad92013-10-11 16:17:44 +000045 // Information about public objects in pdf that can be referenced with ID GEN R
edisonn@google.com571c70b2013-07-10 17:09:50 +000046 struct PublicObjectEntry {
edisonn@google.com2af2ad92013-10-11 16:17:44 +000047 // Offset in the file where the object starts.
edisonn@google.com571c70b2013-07-10 17:09:50 +000048 long fOffset;
edisonn@google.com2af2ad92013-10-11 16:17:44 +000049
50 // Offset in file where the object ends. Could be used to quickly fail if there is a
51 // problem in pdf structure.
edisonn@google.comc8fda9d2013-10-09 20:23:12 +000052 // long endOffset; // TODO(edisonn): determine the end of the object,
53 // to be used when the doc is corrupted, for fast failure.
edisonn@google.com2af2ad92013-10-11 16:17:44 +000054
55 // Refered object.
edisonn@google.com3aa35552013-08-14 18:26:20 +000056 SkPdfNativeObject* fObj;
edisonn@google.com2af2ad92013-10-11 16:17:44 +000057
58 // If refered object is a reference, we resolve recursively the reference until we find
59 // the real object.
edisonn@google.com3aa35552013-08-14 18:26:20 +000060 SkPdfNativeObject* fResolvedReference;
edisonn@google.com2af2ad92013-10-11 16:17:44 +000061
62 // Used to break a recursive reference to itself.
edisonn@google.comf68aed32013-08-22 15:37:21 +000063 bool fIsReferenceResolved;
edisonn@google.com571c70b2013-07-10 17:09:50 +000064 };
65
edisonn@google.com3aac1f92013-07-02 22:42:53 +000066public:
edisonn@google.com2af2ad92013-10-11 16:17:44 +000067 // TODO(edisonn) should be deprecated
edisonn@google.com3aa35552013-08-14 18:26:20 +000068 SkPdfNativeDoc(const char* path);
edisonn@google.com2af2ad92013-10-11 16:17:44 +000069
70 // TODO(edisonn) should be deprecated
edisonn@google.com3aa35552013-08-14 18:26:20 +000071 SkPdfNativeDoc(SkStream* stream);
edisonn@google.com147adb12013-07-24 15:56:19 +000072
edisonn@google.com3aa35552013-08-14 18:26:20 +000073 ~SkPdfNativeDoc();
edisonn@google.com571c70b2013-07-10 17:09:50 +000074
edisonn@google.com2af2ad92013-10-11 16:17:44 +000075 // returns the number of pages in the pdf
edisonn@google.com571c70b2013-07-10 17:09:50 +000076 int pages() const;
edisonn@google.com2af2ad92013-10-11 16:17:44 +000077
78 // returns the page resources
edisonn@google.com571c70b2013-07-10 17:09:50 +000079 SkPdfResourceDictionary* pageResources(int page);
edisonn@google.com2af2ad92013-10-11 16:17:44 +000080
81 // returns the page's mediabox i points - the page physical boundaries.
edisonn@google.com951d6532013-07-10 23:17:31 +000082 SkRect MediaBox(int page);
edisonn@google.com2af2ad92013-10-11 16:17:44 +000083
84 // Returns a tokenizer of a page. The passed allocator will be used to allocate objects that
85 // are parsed. It should be destroyed after the tokenizer.
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000086 SkPdfNativeTokenizer* tokenizerOfPage(int n, SkPdfAllocator* allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +000087
edisonn@google.com2af2ad92013-10-11 16:17:44 +000088 // Returns a tokenizer of a pdf stream. The passed allocator will be used to allocate objects
89 // that are parsed. It should be destroyed after the tokenizer.
edisonn@google.com3aa35552013-08-14 18:26:20 +000090 SkPdfNativeTokenizer* tokenizerOfStream(SkPdfNativeObject* stream, SkPdfAllocator* allocator);
edisonn@google.com2af2ad92013-10-11 16:17:44 +000091
92 // Returns a tokenizer of a memory buffer. The passed allocator will be used to allocate objects
93 // that are parsed. It should be destroyed after the tokenizer.
edisonn@google.com2ccc3af2013-07-23 17:43:18 +000094 SkPdfNativeTokenizer* tokenizerOfBuffer(const unsigned char* buffer, size_t len,
95 SkPdfAllocator* allocator);
edisonn@google.com571c70b2013-07-10 17:09:50 +000096
edisonn@google.com571c70b2013-07-10 17:09:50 +000097
edisonn@google.com2af2ad92013-10-11 16:17:44 +000098 //returns objects that are references and can be queried.
99 size_t objects() const;
100
101 // returns an object.
102 // TODO(edisonn): pdf updates are not supported yet.
103 // add generation parameter to support page updates.
104 SkPdfNativeObject* object(int id /*, int generation*/ );
105
106 // returns the object that holds all the page informnation
107 // TODO(edisonn): pdf updates are not supported yet.
108 // add generation parameter to support page updates.
109 SkPdfPageObjectDictionary* page(int page/*, int generation*/);
110
111 // TODO(edisonn): deprecate the mapper - was used when we supported multiple
112 // parsers (podofo)
113 // The mapper maps allows an object to be mapped to a different dictionary type
114 // and it could verify the integrity of the object.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000115 const SkPdfMapper* mapper() const;
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000116
117 // Allocator of the pdf - this holds all objects that are publicly referenced
118 // and all the objects that they refer
edisonn@google.com571c70b2013-07-10 17:09:50 +0000119 SkPdfAllocator* allocator() const;
120
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000121 // Allows a renderer to create values to be dumped on the stack for operators to process them.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000122 SkPdfReal* createReal(double value) const;
123 SkPdfInteger* createInteger(int value) const;
124 // the string does not own the char*
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000125 SkPdfString* createString(const unsigned char* sz, size_t len) const;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000126
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000127 // Resolve a reference object. Will recursively resolve the reference
128 // until a real object is found
edisonn@google.com3aa35552013-08-14 18:26:20 +0000129 SkPdfNativeObject* resolveReference(SkPdfNativeObject* ref);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000130
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000131 // Reports an approximation of all the memory usage.
edisonn@google.com7b328fd2013-07-11 12:53:06 +0000132 size_t bytesUsed() const;
edisonn@google.coma5aaa792013-07-11 12:27:21 +0000133
edisonn@google.com571c70b2013-07-10 17:09:50 +0000134private:
135
edisonn@google.com147adb12013-07-24 15:56:19 +0000136 // Takes ownership of bytes.
137 void init(const void* bytes, size_t length);
edisonn@google.com2af2ad92013-10-11 16:17:44 +0000138
139 // loads a pdf that has missing xref
edisonn@google.com4ef4bed2013-07-29 22:14:45 +0000140 void loadWithoutXRef();
edisonn@google.com147adb12013-07-24 15:56:19 +0000141
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000142 const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart,
143 const unsigned char* trailerEnd);
144 const unsigned char* readTrailer(const unsigned char* trailerStart,
145 const unsigned char* trailerEnd,
146 bool storeCatalog, long* prev, bool skipKeyword);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000147
edisonn@google.comc8fda9d2013-10-09 20:23:12 +0000148 // TODO(edisonn): pdfs with updates not supported right now, generation ignored.
edisonn@google.com571c70b2013-07-10 17:09:50 +0000149 void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
150 static void reset(PublicObjectEntry* obj) {
151 obj->fObj = NULL;
152 obj->fResolvedReference = NULL;
153 obj->fOffset = -1;
edisonn@google.comf68aed32013-08-22 15:37:21 +0000154 obj->fIsReferenceResolved = false;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000155 }
156
edisonn@google.com3aa35552013-08-14 18:26:20 +0000157 SkPdfNativeObject* readObject(int id/*, int generation*/);
edisonn@google.com571c70b2013-07-10 17:09:50 +0000158
159 void fillPages(SkPdfPageTreeNodeDictionary* tree);
160
edisonn@google.com571c70b2013-07-10 17:09:50 +0000161 SkPdfAllocator* fAllocator;
162 SkPdfMapper* fMapper;
edisonn@google.com2ccc3af2013-07-23 17:43:18 +0000163 const unsigned char* fFileContent;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000164 size_t fContentLength;
edisonn@google.com3aa35552013-08-14 18:26:20 +0000165 SkPdfNativeObject* fRootCatalogRef;
edisonn@google.com571c70b2013-07-10 17:09:50 +0000166 SkPdfCatalogDictionary* fRootCatalog;
167
168 mutable SkTDArray<PublicObjectEntry> fObjects;
169 SkTDArray<SkPdfPageObjectDictionary*> fPages;
edisonn@google.com3aac1f92013-07-02 22:42:53 +0000170};
171
edisonn@google.comcf2cfa12013-08-21 16:31:37 +0000172#endif // SkPdfNativeDoc_DEFINED