blob: 08bc6c0450073659ff3d755e78a18802eda7feea [file] [log] [blame]
Tom Sepez2255a1b2015-01-23 15:33:44 -08001// Copyright 2015 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
art-snake461b1d92016-10-31 12:25:30 -07005#include <algorithm>
6#include <memory>
Dan Sinclair85c8e7f2016-11-21 13:50:32 -05007#include <set>
art-snake461b1d92016-10-31 12:25:30 -07008#include <string>
Dan Sinclair85c8e7f2016-11-21 13:50:32 -05009#include <utility>
art-snake461b1d92016-10-31 12:25:30 -070010#include <vector>
11
Artem Strygina3270302018-06-22 12:45:14 +000012#include "core/fxcrt/bytestring.h"
13#include "core/fxcrt/widestring.h"
Lei Zhangb4e7f302015-11-06 15:52:32 -080014#include "public/fpdfview.h"
Wei Li091f7a02015-11-09 12:09:55 -080015#include "testing/embedder_test.h"
Tom Sepez2255a1b2015-01-23 15:33:44 -080016#include "testing/gtest/include/gtest/gtest.h"
Artem Strygin0e60b9e2017-09-28 18:46:03 +030017#include "testing/range_set.h"
art-snake461b1d92016-10-31 12:25:30 -070018#include "testing/test_support.h"
19#include "testing/utils/path_service.h"
20
21namespace {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030022
Tom Sepez55865452018-08-27 20:18:04 +000023class MockDownloadHints final : public FX_DOWNLOADHINTS {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030024 public:
25 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
26 }
27
28 MockDownloadHints() {
29 FX_DOWNLOADHINTS::version = 1;
30 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
31 }
32
33 ~MockDownloadHints() {}
34};
35
Tom Sepez55865452018-08-27 20:18:04 +000036class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
art-snake461b1d92016-10-31 12:25:30 -070037 public:
Dan Sinclair85c8e7f2016-11-21 13:50:32 -050038 explicit TestAsyncLoader(const std::string& file_name) {
art-snake461b1d92016-10-31 12:25:30 -070039 std::string file_path;
40 if (!PathService::GetTestFilePath(file_name, &file_path))
41 return;
42 file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
43 if (!file_contents_)
44 return;
45
46 file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
47 file_access_.m_GetBlock = SGetBlock;
48 file_access_.m_Param = this;
49
50 FX_DOWNLOADHINTS::version = 1;
51 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
52
53 FX_FILEAVAIL::version = 1;
54 FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
55 }
56
57 bool IsOpened() const { return !!file_contents_; }
58
59 FPDF_FILEACCESS* file_access() { return &file_access_; }
60 FX_DOWNLOADHINTS* hints() { return this; }
61 FX_FILEAVAIL* file_avail() { return this; }
62
63 const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
64 return requested_segments_;
65 }
66
art-snake61f8e9c2016-11-09 21:32:46 -080067 size_t max_requested_bound() const { return max_requested_bound_; }
68
69 void ClearRequestedSegments() {
70 requested_segments_.clear();
71 max_requested_bound_ = 0;
72 }
art-snake461b1d92016-10-31 12:25:30 -070073
74 bool is_new_data_available() const { return is_new_data_available_; }
75 void set_is_new_data_available(bool is_new_data_available) {
76 is_new_data_available_ = is_new_data_available;
77 }
78
art-snake61f8e9c2016-11-09 21:32:46 -080079 size_t max_already_available_bound() const {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030080 return available_ranges_.IsEmpty()
81 ? 0
82 : available_ranges_.ranges().rbegin()->second;
art-snake61f8e9c2016-11-09 21:32:46 -080083 }
84
Artem Strygin0ec10f92017-09-28 17:58:18 +030085 void FlushRequestedData() {
86 for (const auto& it : requested_segments_) {
87 SetDataAvailable(it.first, it.second);
88 }
89 ClearRequestedSegments();
90 }
91
Artem Strygina3270302018-06-22 12:45:14 +000092 char* file_contents() { return file_contents_.get(); }
93 size_t file_length() const { return file_length_; }
94
art-snake461b1d92016-10-31 12:25:30 -070095 private:
96 void SetDataAvailable(size_t start, size_t size) {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030097 available_ranges_.Union(RangeSet::Range(start, start + size));
art-snake461b1d92016-10-31 12:25:30 -070098 }
99
100 bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300101 return available_ranges_.Contains(RangeSet::Range(start, start + size));
art-snake461b1d92016-10-31 12:25:30 -0700102 }
103
104 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
105 if (!IsDataAvailImpl(pos, size))
106 return 0;
107 const unsigned long end =
108 std::min(static_cast<unsigned long>(file_length_), pos + size);
109 if (end <= pos)
110 return 0;
111 memcpy(pBuf, file_contents_.get() + pos, end - pos);
112 SetDataAvailable(pos, end - pos);
113 return static_cast<int>(end - pos);
114 }
115
116 void AddSegmentImpl(size_t offset, size_t size) {
117 requested_segments_.push_back(std::make_pair(offset, size));
art-snake61f8e9c2016-11-09 21:32:46 -0800118 max_requested_bound_ = std::max(max_requested_bound_, offset + size);
art-snake461b1d92016-10-31 12:25:30 -0700119 }
120
121 bool IsDataAvailImpl(size_t offset, size_t size) {
122 if (offset + size > file_length_)
123 return false;
124 if (is_new_data_available_) {
125 SetDataAvailable(offset, size);
126 return true;
127 }
128 return CheckDataAlreadyAvailable(offset, size);
129 }
130
131 static int SGetBlock(void* param,
132 unsigned long pos,
133 unsigned char* pBuf,
134 unsigned long size) {
135 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
136 }
137
138 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
139 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
140 }
141
142 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
143 size_t offset,
144 size_t size) {
145 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
146 }
147
148 FPDF_FILEACCESS file_access_;
149
150 std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
151 size_t file_length_;
152 std::vector<std::pair<size_t, size_t>> requested_segments_;
art-snake61f8e9c2016-11-09 21:32:46 -0800153 size_t max_requested_bound_ = 0;
art-snake461b1d92016-10-31 12:25:30 -0700154 bool is_new_data_available_ = true;
155
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300156 RangeSet available_ranges_;
art-snake461b1d92016-10-31 12:25:30 -0700157};
158
159} // namespace
Tom Sepez2255a1b2015-01-23 15:33:44 -0800160
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700161class FPDFDataAvailEmbeddertest : public EmbedderTest {};
Tom Sepez2255a1b2015-01-23 15:33:44 -0800162
163TEST_F(FPDFDataAvailEmbeddertest, TrailerUnterminated) {
Dan Sinclair6be2aab2015-10-28 13:58:49 -0400164 // Document must load without crashing but is too malformed to be available.
Wei Li091f7a02015-11-09 12:09:55 -0800165 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300166 MockDownloadHints hints;
167 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
Tom Sepez2255a1b2015-01-23 15:33:44 -0800168}
169
170TEST_F(FPDFDataAvailEmbeddertest, TrailerAsHexstring) {
Dan Sinclair6be2aab2015-10-28 13:58:49 -0400171 // Document must load without crashing but is too malformed to be available.
Wei Li091f7a02015-11-09 12:09:55 -0800172 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300173 MockDownloadHints hints;
174 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
Tom Sepez2255a1b2015-01-23 15:33:44 -0800175}
art-snake461b1d92016-10-31 12:25:30 -0700176
177TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) {
178 TestAsyncLoader loader("feature_linearized_loading.pdf");
179 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
180 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
181 document_ = FPDFAvail_GetDocument(avail_, nullptr);
182 ASSERT_TRUE(document_);
183 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
184
185 // No new data available, to prevent load "Pages" node.
186 loader.set_is_new_data_available(false);
Nicolas Pena3ff54002017-07-05 11:55:35 -0400187 FPDF_PAGE page = FPDF_LoadPage(document(), 1);
art-snake461b1d92016-10-31 12:25:30 -0700188 EXPECT_TRUE(page);
Nicolas Pena3ff54002017-07-05 11:55:35 -0400189 FPDF_ClosePage(page);
art-snake461b1d92016-10-31 12:25:30 -0700190}
art-snake61f8e9c2016-11-09 21:32:46 -0800191
Artem Strygin0ec10f92017-09-28 17:58:18 +0300192TEST_F(FPDFDataAvailEmbeddertest, CheckFormAvailIfLinearized) {
193 TestAsyncLoader loader("feature_linearized_loading.pdf");
194 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
195 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
196 document_ = FPDFAvail_GetDocument(avail_, nullptr);
197 ASSERT_TRUE(document_);
198
199 // Prevent access to non requested data to coerce the parser to send new
200 // request for non available (non requested before) data.
201 loader.set_is_new_data_available(false);
202 loader.ClearRequestedSegments();
203
204 int status = PDF_FORM_NOTAVAIL;
205 while (status == PDF_FORM_NOTAVAIL) {
206 loader.FlushRequestedData();
207 status = FPDFAvail_IsFormAvail(avail_, loader.hints());
208 }
209 EXPECT_NE(PDF_FORM_ERROR, status);
210}
211
art-snake61f8e9c2016-11-09 21:32:46 -0800212TEST_F(FPDFDataAvailEmbeddertest,
213 DoNotLoadMainCrossRefForFirstPageIfLinearized) {
214 TestAsyncLoader loader("feature_linearized_loading.pdf");
215 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
216 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
217 document_ = FPDFAvail_GetDocument(avail_, nullptr);
218 ASSERT_TRUE(document_);
219 const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
220
221 // The main cross ref table should not be processed.
222 // (It is always at file end)
223 EXPECT_GT(loader.file_access()->m_FileLen,
224 loader.max_already_available_bound());
225
226 // Prevent access to non requested data to coerce the parser to send new
227 // request for non available (non requested before) data.
228 loader.set_is_new_data_available(false);
229 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
230
231 // The main cross ref table should not be requested.
232 // (It is always at file end)
233 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
234
235 // Allow parse page.
236 loader.set_is_new_data_available(true);
237 ASSERT_EQ(PDF_DATA_AVAIL,
238 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
239
240 // The main cross ref table should not be processed.
241 // (It is always at file end)
242 EXPECT_GT(loader.file_access()->m_FileLen,
243 loader.max_already_available_bound());
244
245 // Prevent loading data, while page loading.
246 loader.set_is_new_data_available(false);
Nicolas Pena3ff54002017-07-05 11:55:35 -0400247 FPDF_PAGE page = FPDF_LoadPage(document(), first_page_num);
art-snake61f8e9c2016-11-09 21:32:46 -0800248 EXPECT_TRUE(page);
Nicolas Pena3ff54002017-07-05 11:55:35 -0400249 FPDF_ClosePage(page);
art-snake61f8e9c2016-11-09 21:32:46 -0800250}
Artem Strygin94df3672017-09-28 18:14:15 +0300251
252TEST_F(FPDFDataAvailEmbeddertest, LoadSecondPageIfLinearizedWithHints) {
253 TestAsyncLoader loader("feature_linearized_loading.pdf");
254 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
255 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
256 document_ = FPDFAvail_GetDocument(avail_, nullptr);
257 ASSERT_TRUE(document_);
258
259 static constexpr uint32_t kSecondPageNum = 1;
260
261 // Prevent access to non requested data to coerce the parser to send new
262 // request for non available (non requested before) data.
263 loader.set_is_new_data_available(false);
264 loader.ClearRequestedSegments();
265
266 int status = PDF_DATA_NOTAVAIL;
267 while (status == PDF_DATA_NOTAVAIL) {
268 loader.FlushRequestedData();
269 status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints());
270 }
271 EXPECT_EQ(PDF_DATA_AVAIL, status);
272
273 // Prevent loading data, while page loading.
274 loader.set_is_new_data_available(false);
275 FPDF_PAGE page = FPDF_LoadPage(document(), kSecondPageNum);
276 EXPECT_TRUE(page);
277 FPDF_ClosePage(page);
278}
Artem Strygina3270302018-06-22 12:45:14 +0000279
280TEST_F(FPDFDataAvailEmbeddertest, LoadInfoAfterReceivingWholeDocument) {
281 TestAsyncLoader loader("linearized.pdf");
282 loader.set_is_new_data_available(false);
283 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
284 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
285 loader.FlushRequestedData();
286 }
287
288 document_ = FPDFAvail_GetDocument(avail_, nullptr);
289 ASSERT_TRUE(document_);
290
291 // The "info" dictionary should still be unavailable.
292 EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
293
294 // Simulate receiving whole file.
295 loader.set_is_new_data_available(true);
296 // Load second page, to parse additional crossref sections.
297 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
298
299 EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
300}
301
302TEST_F(FPDFDataAvailEmbeddertest, LoadInfoAfterReceivingFirstPage) {
303 TestAsyncLoader loader("linearized.pdf");
304 // Map "Info" to an object within the first section without breaking
305 // linearization.
306 ByteString data(loader.file_contents(), loader.file_length());
307 Optional<size_t> index = data.Find("/Info 27 0 R");
308 ASSERT_TRUE(index);
309 memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
310
311 loader.set_is_new_data_available(false);
312 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
313 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
314 loader.FlushRequestedData();
315 }
316
317 document_ = FPDFAvail_GetDocument(avail_, nullptr);
318 ASSERT_TRUE(document_);
319
320 // The "Info" dictionary should be available for the linearized document, if
321 // it is located in the first page section.
322 // Info was remapped to a dictionary with Type "Catalog"
323 unsigned short buffer[100] = {0};
324 EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
325 constexpr wchar_t kExpectedValue[] = L"Catalog";
326 EXPECT_EQ(WideString(kExpectedValue),
327 WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
328}
329
330TEST_F(FPDFDataAvailEmbeddertest, TryLoadInvalidInfo) {
331 TestAsyncLoader loader("linearized.pdf");
332 // Map "Info" to an invalid object without breaking linearization.
333 ByteString data(loader.file_contents(), loader.file_length());
334 Optional<size_t> index = data.Find("/Info 27 0 R");
335 ASSERT_TRUE(index);
336 memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
337
338 loader.set_is_new_data_available(false);
339 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
340 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
341 loader.FlushRequestedData();
342 }
343
344 document_ = FPDFAvail_GetDocument(avail_, nullptr);
345 ASSERT_TRUE(document_);
346
347 // Set all data available.
348 loader.set_is_new_data_available(true);
349 // Check second page, to load additional crossrefs.
350 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
351
352 // Test that api is robust enough to handle the bad case.
353 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
354}
355
356TEST_F(FPDFDataAvailEmbeddertest, TryLoadNonExistsInfo) {
357 TestAsyncLoader loader("linearized.pdf");
358 // Break the "Info" parameter without breaking linearization.
359 ByteString data(loader.file_contents(), loader.file_length());
360 Optional<size_t> index = data.Find("/Info 27 0 R");
361 ASSERT_TRUE(index);
362 memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
363
364 loader.set_is_new_data_available(false);
365 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
366 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
367 loader.FlushRequestedData();
368 }
369
370 document_ = FPDFAvail_GetDocument(avail_, nullptr);
371 ASSERT_TRUE(document_);
372
373 // Set all data available.
374 loader.set_is_new_data_available(true);
375 // Check second page, to load additional crossrefs.
376 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
377
378 // Test that api is robust enough to handle the bad case.
379 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
380}