edisonn@google.com | cf2cfa1 | 2013-08-21 16:31:37 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2013 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkPdfNativeDoc_DEFINED |
| 9 | #define SkPdfNativeDoc_DEFINED |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 10 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 11 | #include "SkRect.h" |
| 12 | #include "SkTDArray.h" |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 13 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 14 | class SkCanvas; |
| 15 | |
| 16 | class SkPdfAllocator; |
| 17 | class SkPdfMapper; |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 18 | class SkPdfNativeObject; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 19 | class SkPdfReal; |
| 20 | class SkPdfInteger; |
| 21 | class SkPdfString; |
| 22 | class SkPdfResourceDictionary; |
| 23 | class SkPdfCatalogDictionary; |
| 24 | class SkPdfPageObjectDictionary; |
| 25 | class SkPdfPageTreeNodeDictionary; |
| 26 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 27 | class SkPdfNativeTokenizer; |
| 28 | |
edisonn@google.com | 147adb1 | 2013-07-24 15:56:19 +0000 | [diff] [blame] | 29 | class SkStream; |
| 30 | |
edisonn@google.com | c8fda9d | 2013-10-09 20:23:12 +0000 | [diff] [blame] | 31 | // TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading |
| 32 | // the bytes in order. For example, we can try to read the stream optimistically, but if there |
| 33 | // are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can. |
| 34 | // This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100) |
| 35 | // But if the pdf is missing the xref, then we will have to read most of pdf to be able to render |
| 36 | // page 100. |
| 37 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 38 | /** \class SkPdfNativeDoc |
| 39 | * |
| 40 | * The SkPdfNativeDoc class is used to load a PDF in memory and it represents a PDF Document. |
| 41 | * |
| 42 | */ |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 43 | class SkPdfNativeDoc { |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 44 | private: |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 45 | // Information about public objects in pdf that can be referenced with ID GEN R |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 46 | struct PublicObjectEntry { |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 47 | // Offset in the file where the object starts. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 48 | long fOffset; |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 49 | |
| 50 | // Offset in file where the object ends. Could be used to quickly fail if there is a |
| 51 | // problem in pdf structure. |
edisonn@google.com | c8fda9d | 2013-10-09 20:23:12 +0000 | [diff] [blame] | 52 | // long endOffset; // TODO(edisonn): determine the end of the object, |
| 53 | // to be used when the doc is corrupted, for fast failure. |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 54 | |
| 55 | // Refered object. |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 56 | SkPdfNativeObject* fObj; |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 57 | |
| 58 | // If refered object is a reference, we resolve recursively the reference until we find |
| 59 | // the real object. |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 60 | SkPdfNativeObject* fResolvedReference; |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 61 | |
| 62 | // Used to break a recursive reference to itself. |
edisonn@google.com | f68aed3 | 2013-08-22 15:37:21 +0000 | [diff] [blame] | 63 | bool fIsReferenceResolved; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 64 | }; |
| 65 | |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 66 | public: |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 67 | // TODO(edisonn) should be deprecated |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 68 | SkPdfNativeDoc(const char* path); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 69 | |
| 70 | // TODO(edisonn) should be deprecated |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 71 | SkPdfNativeDoc(SkStream* stream); |
edisonn@google.com | 147adb1 | 2013-07-24 15:56:19 +0000 | [diff] [blame] | 72 | |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 73 | ~SkPdfNativeDoc(); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 74 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 75 | // returns the number of pages in the pdf |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 76 | int pages() const; |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 77 | |
| 78 | // returns the page resources |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 79 | SkPdfResourceDictionary* pageResources(int page); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 80 | |
| 81 | // returns the page's mediabox i points - the page physical boundaries. |
edisonn@google.com | 951d653 | 2013-07-10 23:17:31 +0000 | [diff] [blame] | 82 | SkRect MediaBox(int page); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 83 | |
| 84 | // Returns a tokenizer of a page. The passed allocator will be used to allocate objects that |
| 85 | // are parsed. It should be destroyed after the tokenizer. |
edisonn@google.com | 2ccc3af | 2013-07-23 17:43:18 +0000 | [diff] [blame] | 86 | SkPdfNativeTokenizer* tokenizerOfPage(int n, SkPdfAllocator* allocator); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 87 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 88 | // Returns a tokenizer of a pdf stream. The passed allocator will be used to allocate objects |
| 89 | // that are parsed. It should be destroyed after the tokenizer. |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 90 | SkPdfNativeTokenizer* tokenizerOfStream(SkPdfNativeObject* stream, SkPdfAllocator* allocator); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 91 | |
| 92 | // Returns a tokenizer of a memory buffer. The passed allocator will be used to allocate objects |
| 93 | // that are parsed. It should be destroyed after the tokenizer. |
edisonn@google.com | 2ccc3af | 2013-07-23 17:43:18 +0000 | [diff] [blame] | 94 | SkPdfNativeTokenizer* tokenizerOfBuffer(const unsigned char* buffer, size_t len, |
| 95 | SkPdfAllocator* allocator); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 96 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 97 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 98 | //returns objects that are references and can be queried. |
| 99 | size_t objects() const; |
| 100 | |
| 101 | // returns an object. |
| 102 | // TODO(edisonn): pdf updates are not supported yet. |
| 103 | // add generation parameter to support page updates. |
| 104 | SkPdfNativeObject* object(int id /*, int generation*/ ); |
| 105 | |
| 106 | // returns the object that holds all the page informnation |
| 107 | // TODO(edisonn): pdf updates are not supported yet. |
| 108 | // add generation parameter to support page updates. |
| 109 | SkPdfPageObjectDictionary* page(int page/*, int generation*/); |
| 110 | |
| 111 | // TODO(edisonn): deprecate the mapper - was used when we supported multiple |
| 112 | // parsers (podofo) |
| 113 | // The mapper maps allows an object to be mapped to a different dictionary type |
| 114 | // and it could verify the integrity of the object. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 115 | const SkPdfMapper* mapper() const; |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 116 | |
| 117 | // Allocator of the pdf - this holds all objects that are publicly referenced |
| 118 | // and all the objects that they refer |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 119 | SkPdfAllocator* allocator() const; |
| 120 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 121 | // Allows a renderer to create values to be dumped on the stack for operators to process them. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 122 | SkPdfReal* createReal(double value) const; |
| 123 | SkPdfInteger* createInteger(int value) const; |
| 124 | // the string does not own the char* |
edisonn@google.com | 2ccc3af | 2013-07-23 17:43:18 +0000 | [diff] [blame] | 125 | SkPdfString* createString(const unsigned char* sz, size_t len) const; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 126 | |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 127 | // Resolve a reference object. Will recursively resolve the reference |
| 128 | // until a real object is found |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 129 | SkPdfNativeObject* resolveReference(SkPdfNativeObject* ref); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 130 | |
edisonn@google.com | a5aaa79 | 2013-07-11 12:27:21 +0000 | [diff] [blame] | 131 | // Reports an approximation of all the memory usage. |
edisonn@google.com | 7b328fd | 2013-07-11 12:53:06 +0000 | [diff] [blame] | 132 | size_t bytesUsed() const; |
edisonn@google.com | a5aaa79 | 2013-07-11 12:27:21 +0000 | [diff] [blame] | 133 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 134 | private: |
| 135 | |
edisonn@google.com | 147adb1 | 2013-07-24 15:56:19 +0000 | [diff] [blame] | 136 | // Takes ownership of bytes. |
| 137 | void init(const void* bytes, size_t length); |
edisonn@google.com | 2af2ad9 | 2013-10-11 16:17:44 +0000 | [diff] [blame] | 138 | |
| 139 | // loads a pdf that has missing xref |
edisonn@google.com | 4ef4bed | 2013-07-29 22:14:45 +0000 | [diff] [blame] | 140 | void loadWithoutXRef(); |
edisonn@google.com | 147adb1 | 2013-07-24 15:56:19 +0000 | [diff] [blame] | 141 | |
edisonn@google.com | c8fda9d | 2013-10-09 20:23:12 +0000 | [diff] [blame] | 142 | const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart, |
| 143 | const unsigned char* trailerEnd); |
| 144 | const unsigned char* readTrailer(const unsigned char* trailerStart, |
| 145 | const unsigned char* trailerEnd, |
| 146 | bool storeCatalog, long* prev, bool skipKeyword); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 147 | |
edisonn@google.com | c8fda9d | 2013-10-09 20:23:12 +0000 | [diff] [blame] | 148 | // TODO(edisonn): pdfs with updates not supported right now, generation ignored. |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 149 | void addCrossSectionInfo(int id, int generation, int offset, bool isFreed); |
| 150 | static void reset(PublicObjectEntry* obj) { |
| 151 | obj->fObj = NULL; |
| 152 | obj->fResolvedReference = NULL; |
| 153 | obj->fOffset = -1; |
edisonn@google.com | f68aed3 | 2013-08-22 15:37:21 +0000 | [diff] [blame] | 154 | obj->fIsReferenceResolved = false; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 155 | } |
| 156 | |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 157 | SkPdfNativeObject* readObject(int id/*, int generation*/); |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 158 | |
| 159 | void fillPages(SkPdfPageTreeNodeDictionary* tree); |
| 160 | |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 161 | SkPdfAllocator* fAllocator; |
| 162 | SkPdfMapper* fMapper; |
edisonn@google.com | 2ccc3af | 2013-07-23 17:43:18 +0000 | [diff] [blame] | 163 | const unsigned char* fFileContent; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 164 | size_t fContentLength; |
edisonn@google.com | 3aa3555 | 2013-08-14 18:26:20 +0000 | [diff] [blame] | 165 | SkPdfNativeObject* fRootCatalogRef; |
edisonn@google.com | 571c70b | 2013-07-10 17:09:50 +0000 | [diff] [blame] | 166 | SkPdfCatalogDictionary* fRootCatalog; |
| 167 | |
| 168 | mutable SkTDArray<PublicObjectEntry> fObjects; |
| 169 | SkTDArray<SkPdfPageObjectDictionary*> fPages; |
edisonn@google.com | 3aac1f9 | 2013-07-02 22:42:53 +0000 | [diff] [blame] | 170 | }; |
| 171 | |
edisonn@google.com | cf2cfa1 | 2013-08-21 16:31:37 +0000 | [diff] [blame] | 172 | #endif // SkPdfNativeDoc_DEFINED |