blob: 02b481c6eaa54f3407dbfd5d8b411390ce6eaff4 [file] [log] [blame]
Tom Sepez2255a1b2015-01-23 15:33:44 -08001// Copyright 2015 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
art-snake461b1d92016-10-31 12:25:30 -07005#include <algorithm>
6#include <memory>
7#include <string>
Dan Sinclair85c8e7f2016-11-21 13:50:32 -05008#include <utility>
art-snake461b1d92016-10-31 12:25:30 -07009#include <vector>
10
Artem Strygina3270302018-06-22 12:45:14 +000011#include "core/fxcrt/bytestring.h"
12#include "core/fxcrt/widestring.h"
Lei Zhangb4e7f302015-11-06 15:52:32 -080013#include "public/fpdfview.h"
Wei Li091f7a02015-11-09 12:09:55 -080014#include "testing/embedder_test.h"
Tom Sepez2255a1b2015-01-23 15:33:44 -080015#include "testing/gtest/include/gtest/gtest.h"
Artem Strygin0e60b9e2017-09-28 18:46:03 +030016#include "testing/range_set.h"
Lei Zhangb6992dd2019-02-05 23:30:20 +000017#include "testing/utils/file_util.h"
art-snake461b1d92016-10-31 12:25:30 -070018#include "testing/utils/path_service.h"
19
20namespace {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030021
Tom Sepez55865452018-08-27 20:18:04 +000022class MockDownloadHints final : public FX_DOWNLOADHINTS {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030023 public:
24 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
25 }
26
27 MockDownloadHints() {
28 FX_DOWNLOADHINTS::version = 1;
29 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
30 }
31
Lei Zhang38a188d2019-08-07 18:46:13 +000032 ~MockDownloadHints() = default;
Artem Strygin0e60b9e2017-09-28 18:46:03 +030033};
34
Tom Sepez55865452018-08-27 20:18:04 +000035class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
art-snake461b1d92016-10-31 12:25:30 -070036 public:
Dan Sinclair85c8e7f2016-11-21 13:50:32 -050037 explicit TestAsyncLoader(const std::string& file_name) {
art-snake461b1d92016-10-31 12:25:30 -070038 std::string file_path;
39 if (!PathService::GetTestFilePath(file_name, &file_path))
40 return;
41 file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
42 if (!file_contents_)
43 return;
44
45 file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
46 file_access_.m_GetBlock = SGetBlock;
47 file_access_.m_Param = this;
48
49 FX_DOWNLOADHINTS::version = 1;
50 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
51
52 FX_FILEAVAIL::version = 1;
53 FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
54 }
55
56 bool IsOpened() const { return !!file_contents_; }
57
58 FPDF_FILEACCESS* file_access() { return &file_access_; }
59 FX_DOWNLOADHINTS* hints() { return this; }
60 FX_FILEAVAIL* file_avail() { return this; }
61
62 const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
63 return requested_segments_;
64 }
65
art-snake61f8e9c2016-11-09 21:32:46 -080066 size_t max_requested_bound() const { return max_requested_bound_; }
67
68 void ClearRequestedSegments() {
69 requested_segments_.clear();
70 max_requested_bound_ = 0;
71 }
art-snake461b1d92016-10-31 12:25:30 -070072
73 bool is_new_data_available() const { return is_new_data_available_; }
74 void set_is_new_data_available(bool is_new_data_available) {
75 is_new_data_available_ = is_new_data_available;
76 }
77
art-snake61f8e9c2016-11-09 21:32:46 -080078 size_t max_already_available_bound() const {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030079 return available_ranges_.IsEmpty()
80 ? 0
81 : available_ranges_.ranges().rbegin()->second;
art-snake61f8e9c2016-11-09 21:32:46 -080082 }
83
Artem Strygin0ec10f92017-09-28 17:58:18 +030084 void FlushRequestedData() {
85 for (const auto& it : requested_segments_) {
86 SetDataAvailable(it.first, it.second);
87 }
88 ClearRequestedSegments();
89 }
90
Artem Strygina3270302018-06-22 12:45:14 +000091 char* file_contents() { return file_contents_.get(); }
92 size_t file_length() const { return file_length_; }
93
art-snake461b1d92016-10-31 12:25:30 -070094 private:
95 void SetDataAvailable(size_t start, size_t size) {
Artem Strygin0e60b9e2017-09-28 18:46:03 +030096 available_ranges_.Union(RangeSet::Range(start, start + size));
art-snake461b1d92016-10-31 12:25:30 -070097 }
98
99 bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300100 return available_ranges_.Contains(RangeSet::Range(start, start + size));
art-snake461b1d92016-10-31 12:25:30 -0700101 }
102
103 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
104 if (!IsDataAvailImpl(pos, size))
105 return 0;
106 const unsigned long end =
107 std::min(static_cast<unsigned long>(file_length_), pos + size);
108 if (end <= pos)
109 return 0;
110 memcpy(pBuf, file_contents_.get() + pos, end - pos);
111 SetDataAvailable(pos, end - pos);
112 return static_cast<int>(end - pos);
113 }
114
115 void AddSegmentImpl(size_t offset, size_t size) {
116 requested_segments_.push_back(std::make_pair(offset, size));
art-snake61f8e9c2016-11-09 21:32:46 -0800117 max_requested_bound_ = std::max(max_requested_bound_, offset + size);
art-snake461b1d92016-10-31 12:25:30 -0700118 }
119
120 bool IsDataAvailImpl(size_t offset, size_t size) {
121 if (offset + size > file_length_)
122 return false;
123 if (is_new_data_available_) {
124 SetDataAvailable(offset, size);
125 return true;
126 }
127 return CheckDataAlreadyAvailable(offset, size);
128 }
129
130 static int SGetBlock(void* param,
131 unsigned long pos,
132 unsigned char* pBuf,
133 unsigned long size) {
134 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
135 }
136
137 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
138 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
139 }
140
141 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
142 size_t offset,
143 size_t size) {
144 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
145 }
146
147 FPDF_FILEACCESS file_access_;
148
149 std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
Lei Zhang38a188d2019-08-07 18:46:13 +0000150 size_t file_length_ = 0;
art-snake461b1d92016-10-31 12:25:30 -0700151 std::vector<std::pair<size_t, size_t>> requested_segments_;
art-snake61f8e9c2016-11-09 21:32:46 -0800152 size_t max_requested_bound_ = 0;
art-snake461b1d92016-10-31 12:25:30 -0700153 bool is_new_data_available_ = true;
154
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300155 RangeSet available_ranges_;
art-snake461b1d92016-10-31 12:25:30 -0700156};
157
158} // namespace
Tom Sepez2255a1b2015-01-23 15:33:44 -0800159
Lei Zhangab41f252018-12-23 03:10:50 +0000160class FPDFDataAvailEmbedderTest : public EmbedderTest {};
Tom Sepez2255a1b2015-01-23 15:33:44 -0800161
Lei Zhangab41f252018-12-23 03:10:50 +0000162TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
Dan Sinclair6be2aab2015-10-28 13:58:49 -0400163 // Document must load without crashing but is too malformed to be available.
Wei Li091f7a02015-11-09 12:09:55 -0800164 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300165 MockDownloadHints hints;
166 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
Tom Sepez2255a1b2015-01-23 15:33:44 -0800167}
168
Lei Zhangab41f252018-12-23 03:10:50 +0000169TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
Dan Sinclair6be2aab2015-10-28 13:58:49 -0400170 // Document must load without crashing but is too malformed to be available.
Wei Li091f7a02015-11-09 12:09:55 -0800171 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
Artem Strygin0e60b9e2017-09-28 18:46:03 +0300172 MockDownloadHints hints;
173 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
Tom Sepez2255a1b2015-01-23 15:33:44 -0800174}
art-snake461b1d92016-10-31 12:25:30 -0700175
Lei Zhangab41f252018-12-23 03:10:50 +0000176TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
art-snake461b1d92016-10-31 12:25:30 -0700177 TestAsyncLoader loader("feature_linearized_loading.pdf");
178 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
179 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
180 document_ = FPDFAvail_GetDocument(avail_, nullptr);
181 ASSERT_TRUE(document_);
182 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
183
184 // No new data available, to prevent load "Pages" node.
185 loader.set_is_new_data_available(false);
Lei Zhang38a188d2019-08-07 18:46:13 +0000186 ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
art-snake461b1d92016-10-31 12:25:30 -0700187 EXPECT_TRUE(page);
art-snake461b1d92016-10-31 12:25:30 -0700188}
art-snake61f8e9c2016-11-09 21:32:46 -0800189
Lei Zhangab41f252018-12-23 03:10:50 +0000190TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
Artem Strygin0ec10f92017-09-28 17:58:18 +0300191 TestAsyncLoader loader("feature_linearized_loading.pdf");
192 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
193 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
194 document_ = FPDFAvail_GetDocument(avail_, nullptr);
195 ASSERT_TRUE(document_);
196
Lei Zhang38a188d2019-08-07 18:46:13 +0000197 // Prevent access to non-requested data to coerce the parser to send new
198 // request for non available (non-requested before) data.
Artem Strygin0ec10f92017-09-28 17:58:18 +0300199 loader.set_is_new_data_available(false);
200 loader.ClearRequestedSegments();
201
202 int status = PDF_FORM_NOTAVAIL;
203 while (status == PDF_FORM_NOTAVAIL) {
204 loader.FlushRequestedData();
205 status = FPDFAvail_IsFormAvail(avail_, loader.hints());
206 }
207 EXPECT_NE(PDF_FORM_ERROR, status);
208}
209
Lei Zhangab41f252018-12-23 03:10:50 +0000210TEST_F(FPDFDataAvailEmbedderTest,
art-snake61f8e9c2016-11-09 21:32:46 -0800211 DoNotLoadMainCrossRefForFirstPageIfLinearized) {
212 TestAsyncLoader loader("feature_linearized_loading.pdf");
213 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
214 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
215 document_ = FPDFAvail_GetDocument(avail_, nullptr);
216 ASSERT_TRUE(document_);
217 const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
218
219 // The main cross ref table should not be processed.
220 // (It is always at file end)
221 EXPECT_GT(loader.file_access()->m_FileLen,
222 loader.max_already_available_bound());
223
Lei Zhang38a188d2019-08-07 18:46:13 +0000224 // Prevent access to non-requested data to coerce the parser to send new
225 // request for non available (non-requested before) data.
art-snake61f8e9c2016-11-09 21:32:46 -0800226 loader.set_is_new_data_available(false);
227 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
228
229 // The main cross ref table should not be requested.
230 // (It is always at file end)
231 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
232
233 // Allow parse page.
234 loader.set_is_new_data_available(true);
235 ASSERT_EQ(PDF_DATA_AVAIL,
236 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
237
238 // The main cross ref table should not be processed.
239 // (It is always at file end)
240 EXPECT_GT(loader.file_access()->m_FileLen,
241 loader.max_already_available_bound());
242
243 // Prevent loading data, while page loading.
244 loader.set_is_new_data_available(false);
Lei Zhang38a188d2019-08-07 18:46:13 +0000245 ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
art-snake61f8e9c2016-11-09 21:32:46 -0800246 EXPECT_TRUE(page);
art-snake61f8e9c2016-11-09 21:32:46 -0800247}
Artem Strygin94df3672017-09-28 18:14:15 +0300248
Lei Zhangab41f252018-12-23 03:10:50 +0000249TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
Artem Strygin94df3672017-09-28 18:14:15 +0300250 TestAsyncLoader loader("feature_linearized_loading.pdf");
251 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
252 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
253 document_ = FPDFAvail_GetDocument(avail_, nullptr);
254 ASSERT_TRUE(document_);
255
256 static constexpr uint32_t kSecondPageNum = 1;
257
Lei Zhang38a188d2019-08-07 18:46:13 +0000258 // Prevent access to non-requested data to coerce the parser to send new
259 // request for non available (non-requested before) data.
Artem Strygin94df3672017-09-28 18:14:15 +0300260 loader.set_is_new_data_available(false);
261 loader.ClearRequestedSegments();
262
263 int status = PDF_DATA_NOTAVAIL;
264 while (status == PDF_DATA_NOTAVAIL) {
265 loader.FlushRequestedData();
266 status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints());
267 }
268 EXPECT_EQ(PDF_DATA_AVAIL, status);
269
270 // Prevent loading data, while page loading.
271 loader.set_is_new_data_available(false);
Lei Zhang38a188d2019-08-07 18:46:13 +0000272 ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
Artem Strygin94df3672017-09-28 18:14:15 +0300273 EXPECT_TRUE(page);
Artem Strygin94df3672017-09-28 18:14:15 +0300274}
Artem Strygina3270302018-06-22 12:45:14 +0000275
Lei Zhangab41f252018-12-23 03:10:50 +0000276TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
Artem Strygina3270302018-06-22 12:45:14 +0000277 TestAsyncLoader loader("linearized.pdf");
278 loader.set_is_new_data_available(false);
279 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
280 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
281 loader.FlushRequestedData();
282 }
283
284 document_ = FPDFAvail_GetDocument(avail_, nullptr);
285 ASSERT_TRUE(document_);
286
287 // The "info" dictionary should still be unavailable.
288 EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
289
290 // Simulate receiving whole file.
291 loader.set_is_new_data_available(true);
292 // Load second page, to parse additional crossref sections.
293 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
294
295 EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
296}
297
Lei Zhangab41f252018-12-23 03:10:50 +0000298TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
Artem Strygina3270302018-06-22 12:45:14 +0000299 TestAsyncLoader loader("linearized.pdf");
300 // Map "Info" to an object within the first section without breaking
301 // linearization.
302 ByteString data(loader.file_contents(), loader.file_length());
303 Optional<size_t> index = data.Find("/Info 27 0 R");
304 ASSERT_TRUE(index);
305 memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
306
307 loader.set_is_new_data_available(false);
308 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
309 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
310 loader.FlushRequestedData();
311 }
312
313 document_ = FPDFAvail_GetDocument(avail_, nullptr);
314 ASSERT_TRUE(document_);
315
316 // The "Info" dictionary should be available for the linearized document, if
317 // it is located in the first page section.
318 // Info was remapped to a dictionary with Type "Catalog"
319 unsigned short buffer[100] = {0};
320 EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
321 constexpr wchar_t kExpectedValue[] = L"Catalog";
322 EXPECT_EQ(WideString(kExpectedValue),
323 WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
324}
325
Lei Zhangab41f252018-12-23 03:10:50 +0000326TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
Artem Strygina3270302018-06-22 12:45:14 +0000327 TestAsyncLoader loader("linearized.pdf");
328 // Map "Info" to an invalid object without breaking linearization.
329 ByteString data(loader.file_contents(), loader.file_length());
330 Optional<size_t> index = data.Find("/Info 27 0 R");
331 ASSERT_TRUE(index);
332 memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
333
334 loader.set_is_new_data_available(false);
335 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
336 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
337 loader.FlushRequestedData();
338 }
339
340 document_ = FPDFAvail_GetDocument(avail_, nullptr);
341 ASSERT_TRUE(document_);
342
343 // Set all data available.
344 loader.set_is_new_data_available(true);
345 // Check second page, to load additional crossrefs.
346 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
347
348 // Test that api is robust enough to handle the bad case.
349 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
350}
351
Lei Zhangab41f252018-12-23 03:10:50 +0000352TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
Artem Strygina3270302018-06-22 12:45:14 +0000353 TestAsyncLoader loader("linearized.pdf");
354 // Break the "Info" parameter without breaking linearization.
355 ByteString data(loader.file_contents(), loader.file_length());
356 Optional<size_t> index = data.Find("/Info 27 0 R");
357 ASSERT_TRUE(index);
358 memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
359
360 loader.set_is_new_data_available(false);
361 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
362 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
363 loader.FlushRequestedData();
364 }
365
366 document_ = FPDFAvail_GetDocument(avail_, nullptr);
367 ASSERT_TRUE(document_);
368
369 // Set all data available.
370 loader.set_is_new_data_available(true);
371 // Check second page, to load additional crossrefs.
372 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
373
374 // Test that api is robust enough to handle the bad case.
375 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
376}
Lei Zhangd894b9a2019-08-07 18:42:44 +0000377
378TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
379 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
380 EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
381 EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
382 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
383 EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
384 EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
385}
386
387TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
388 TestAsyncLoader loader("linearized.pdf");
389 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
390 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
391 EXPECT_EQ(PDF_DATA_NOTAVAIL,
392 FPDFAvail_IsPageAvail(avail_, -1, loader.hints()));
393}