blob: 1536c8679bea95d1f0e55541cd0eaabcbf73e544 [file] [log] [blame]
Tom Sepez26b8a5b2015-01-27 12:42:36 -08001// Copyright 2015 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Dan Sinclair85c8e7f2016-11-21 13:50:32 -05005#include <memory>
6
Dan Sinclairbcd1e702017-08-31 13:19:18 -04007#include "core/fxcrt/fx_memory.h"
Lei Zhangb4e7f302015-11-06 15:52:32 -08008#include "public/fpdf_text.h"
9#include "public/fpdfview.h"
Wei Li091f7a02015-11-09 12:09:55 -080010#include "testing/embedder_test.h"
Tom Sepez26b8a5b2015-01-27 12:42:36 -080011#include "testing/gtest/include/gtest/gtest.h"
Dan Sinclair61046b92016-02-18 14:48:48 -050012#include "testing/test_support.h"
Tom Sepez26b8a5b2015-01-27 12:42:36 -080013
Tom Sepez526f6d52015-01-28 15:49:13 -080014namespace {
15
Lei Zhang0f2ea022016-01-11 12:01:23 -080016bool check_unsigned_shorts(const char* expected,
17 const unsigned short* actual,
18 size_t length) {
Ryan Harrison2bf05a62017-09-05 11:48:55 -040019 if (length > strlen(expected) + 1)
Tom Sepez526f6d52015-01-28 15:49:13 -080020 return false;
Ryan Harrison2bf05a62017-09-05 11:48:55 -040021
Tom Sepez526f6d52015-01-28 15:49:13 -080022 for (size_t i = 0; i < length; ++i) {
Ryan Harrison2bf05a62017-09-05 11:48:55 -040023 if (actual[i] != static_cast<unsigned short>(expected[i]))
Tom Sepez526f6d52015-01-28 15:49:13 -080024 return false;
Tom Sepez526f6d52015-01-28 15:49:13 -080025 }
26 return true;
27}
28
29} // namespace
30
Nico Weber9d8ec5a2015-08-04 13:00:21 -070031class FPDFTextEmbeddertest : public EmbedderTest {};
Tom Sepez26b8a5b2015-01-27 12:42:36 -080032
Tom Sepez526f6d52015-01-28 15:49:13 -080033TEST_F(FPDFTextEmbeddertest, Text) {
Wei Li091f7a02015-11-09 12:09:55 -080034 EXPECT_TRUE(OpenDocument("hello_world.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -080035 FPDF_PAGE page = LoadPage(0);
thestig4997b222016-06-07 10:46:22 -070036 EXPECT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -080037
38 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
thestig4997b222016-06-07 10:46:22 -070039 EXPECT_TRUE(textpage);
Tom Sepez526f6d52015-01-28 15:49:13 -080040
Lei Zhangd27acae2015-05-15 15:36:02 -070041 static const char expected[] = "Hello, world!\r\nGoodbye, world!";
Tom Sepez526f6d52015-01-28 15:49:13 -080042 unsigned short fixed_buffer[128];
43 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
44
Ryan Harrisonc5ac0572017-08-31 16:37:48 -040045 // Check that unreasonable inputs are handled gracefully
46 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr));
47 EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, fixed_buffer));
48 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 0, fixed_buffer));
49 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, fixed_buffer));
50
Tom Sepez526f6d52015-01-28 15:49:13 -080051 // Check includes the terminating NUL that is provided.
Lei Zhanga0f67242015-08-17 15:39:30 -070052 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer);
53 ASSERT_GE(num_chars, 0);
Oliver Chang35e68a52015-12-09 12:44:33 -080054 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars));
55 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected)));
Tom Sepez526f6d52015-01-28 15:49:13 -080056
57 // Count does not include the terminating NUL in the string literal.
Wei Li05d53f02016-03-29 16:42:53 -070058 EXPECT_EQ(sizeof(expected) - 1,
59 static_cast<size_t>(FPDFText_CountChars(textpage)));
Tom Sepez526f6d52015-01-28 15:49:13 -080060 for (size_t i = 0; i < sizeof(expected) - 1; ++i) {
Lei Zhanga0f67242015-08-17 15:39:30 -070061 EXPECT_EQ(static_cast<unsigned int>(expected[i]),
62 FPDFText_GetUnicode(textpage, i))
63 << " at " << i;
Tom Sepez526f6d52015-01-28 15:49:13 -080064 }
65
Ryan Harrison2bf05a62017-09-05 11:48:55 -040066 // Extracting using a buffer that will be completely filled. Small buffer is
67 // 12 elements long, since it will need 2 locations per displayed character in
68 // the expected string, plus 2 more for the terminating character.
69 static const char small_expected[] = "Hello";
70 unsigned short small_buffer[12];
71 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
72 EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 6, small_buffer));
73 EXPECT_TRUE(check_unsigned_shorts(small_expected, small_buffer,
74 sizeof(small_expected)));
75
Tom Sepez526f6d52015-01-28 15:49:13 -080076 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
77 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
78
79 double left = 0.0;
80 double right = 0.0;
81 double bottom = 0.0;
82 double top = 0.0;
83 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top);
84 EXPECT_NEAR(41.071, left, 0.001);
85 EXPECT_NEAR(46.243, right, 0.001);
86 EXPECT_NEAR(49.844, bottom, 0.001);
87 EXPECT_NEAR(55.520, top, 0.001);
88
Andrew Weintraubd3002342017-08-11 11:36:51 -040089 double x = 0.0;
90 double y = 0.0;
91 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 4, &x, &y));
92 EXPECT_NEAR(40.664, x, 0.001);
93 EXPECT_NEAR(50.000, y, 0.001);
94
Nico Weber9d8ec5a2015-08-04 13:00:21 -070095 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0));
96 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0));
97 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0));
Tom Sepez526f6d52015-01-28 15:49:13 -080098
99 // Test out of range indicies.
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700100 EXPECT_EQ(-1,
101 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0));
102 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800103
104 // Count does not include the terminating NUL in the string literal.
105 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1));
106
107 left = 0.0;
108 right = 0.0;
109 bottom = 0.0;
110 top = 0.0;
111 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom);
112 EXPECT_NEAR(20.847, left, 0.001);
113 EXPECT_NEAR(135.167, right, 0.001);
114 EXPECT_NEAR(96.655, bottom, 0.001);
115 EXPECT_NEAR(116.000, top, 0.001);
116
117 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
118 left = -1.0;
119 right = -1.0;
120 bottom = -1.0;
121 top = -1.0;
122 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom);
123 EXPECT_EQ(0.0, left);
124 EXPECT_EQ(0.0, right);
125 EXPECT_EQ(0.0, bottom);
126 EXPECT_EQ(0.0, top);
127
128 left = -2.0;
129 right = -2.0;
130 bottom = -2.0;
131 top = -2.0;
132 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom);
133 EXPECT_EQ(0.0, left);
134 EXPECT_EQ(0.0, right);
135 EXPECT_EQ(0.0, bottom);
136 EXPECT_EQ(0.0, top);
137
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700138 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800139
140 // Extract starting at character 4 as above.
141 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700142 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
143 fixed_buffer, 1));
Tom Sepez526f6d52015-01-28 15:49:13 -0800144 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1));
145 EXPECT_EQ(0xbdbd, fixed_buffer[1]);
146
147 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700148 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
149 fixed_buffer, 9));
Tom Sepez526f6d52015-01-28 15:49:13 -0800150 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
151 EXPECT_EQ(0xbdbd, fixed_buffer[9]);
152
153 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
Oliver Chang35e68a52015-12-09 12:44:33 -0800154 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
155 fixed_buffer, 128));
Tom Sepez526f6d52015-01-28 15:49:13 -0800156 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
Oliver Chang35e68a52015-12-09 12:44:33 -0800157 EXPECT_EQ(0u, fixed_buffer[9]);
158 EXPECT_EQ(0xbdbd, fixed_buffer[10]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800159
160 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700161 UnloadPage(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800162}
163
164TEST_F(FPDFTextEmbeddertest, TextSearch) {
Wei Li091f7a02015-11-09 12:09:55 -0800165 EXPECT_TRUE(OpenDocument("hello_world.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -0800166 FPDF_PAGE page = LoadPage(0);
thestig4997b222016-06-07 10:46:22 -0700167 EXPECT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800168
169 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
thestig4997b222016-06-07 10:46:22 -0700170 EXPECT_TRUE(textpage);
Tom Sepez526f6d52015-01-28 15:49:13 -0800171
Tom Sepez0aa35312016-01-06 10:16:32 -0800172 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope =
173 GetFPDFWideString(L"nope");
174 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world =
175 GetFPDFWideString(L"world");
176 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps =
177 GetFPDFWideString(L"WORLD");
178 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr =
179 GetFPDFWideString(L"orld");
Tom Sepez526f6d52015-01-28 15:49:13 -0800180
181 // No occurences of "nope" in test page.
Tom Sepez0aa35312016-01-06 10:16:32 -0800182 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0);
thestig4997b222016-06-07 10:46:22 -0700183 EXPECT_TRUE(search);
Tom Sepez526f6d52015-01-28 15:49:13 -0800184 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
185 EXPECT_EQ(0, FPDFText_GetSchCount(search));
186
187 // Advancing finds nothing.
188 EXPECT_FALSE(FPDFText_FindNext(search));
189 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
190 EXPECT_EQ(0, FPDFText_GetSchCount(search));
191
192 // Retreating finds nothing.
193 EXPECT_FALSE(FPDFText_FindPrev(search));
194 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
195 EXPECT_EQ(0, FPDFText_GetSchCount(search));
196 FPDFText_FindClose(search);
197
198 // Two occurences of "world" in test page.
Tom Sepez0aa35312016-01-06 10:16:32 -0800199 search = FPDFText_FindStart(textpage, world.get(), 0, 2);
thestig4997b222016-06-07 10:46:22 -0700200 EXPECT_TRUE(search);
Tom Sepez526f6d52015-01-28 15:49:13 -0800201
202 // Remains not found until advanced.
203 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
204 EXPECT_EQ(0, FPDFText_GetSchCount(search));
205
206 // First occurence of "world" in this test page.
207 EXPECT_TRUE(FPDFText_FindNext(search));
208 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
209 EXPECT_EQ(5, FPDFText_GetSchCount(search));
210
211 // Last occurence of "world" in this test page.
212 EXPECT_TRUE(FPDFText_FindNext(search));
213 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
214 EXPECT_EQ(5, FPDFText_GetSchCount(search));
215
216 // Found position unchanged when fails to advance.
217 EXPECT_FALSE(FPDFText_FindNext(search));
218 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
219 EXPECT_EQ(5, FPDFText_GetSchCount(search));
220
221 // Back to first occurence.
222 EXPECT_TRUE(FPDFText_FindPrev(search));
223 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
224 EXPECT_EQ(5, FPDFText_GetSchCount(search));
225
226 // Found position unchanged when fails to retreat.
227 EXPECT_FALSE(FPDFText_FindPrev(search));
228 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
229 EXPECT_EQ(5, FPDFText_GetSchCount(search));
230 FPDFText_FindClose(search);
231
232 // Exact search unaffected by case sensitiity and whole word flags.
Tom Sepez0aa35312016-01-06 10:16:32 -0800233 search = FPDFText_FindStart(textpage, world.get(),
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700234 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0);
thestig4997b222016-06-07 10:46:22 -0700235 EXPECT_TRUE(search);
Tom Sepez526f6d52015-01-28 15:49:13 -0800236 EXPECT_TRUE(FPDFText_FindNext(search));
237 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
238 EXPECT_EQ(5, FPDFText_GetSchCount(search));
239 FPDFText_FindClose(search);
240
241 // Default is case-insensitive, so matching agaist caps works.
Tom Sepez0aa35312016-01-06 10:16:32 -0800242 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0);
thestig4997b222016-06-07 10:46:22 -0700243 EXPECT_TRUE(search);
Tom Sepez526f6d52015-01-28 15:49:13 -0800244 EXPECT_TRUE(FPDFText_FindNext(search));
245 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
246 EXPECT_EQ(5, FPDFText_GetSchCount(search));
247 FPDFText_FindClose(search);
248
249 // But can be made case sensitive, in which case this fails.
Tom Sepez0aa35312016-01-06 10:16:32 -0800250 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0);
Tom Sepez526f6d52015-01-28 15:49:13 -0800251 EXPECT_FALSE(FPDFText_FindNext(search));
252 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
253 EXPECT_EQ(0, FPDFText_GetSchCount(search));
254 FPDFText_FindClose(search);
255
256 // Default is match anywhere within word, so matching substirng works.
Tom Sepez0aa35312016-01-06 10:16:32 -0800257 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0);
Tom Sepez526f6d52015-01-28 15:49:13 -0800258 EXPECT_TRUE(FPDFText_FindNext(search));
259 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search));
260 EXPECT_EQ(4, FPDFText_GetSchCount(search));
261 FPDFText_FindClose(search);
262
263 // But can be made to mach word boundaries, in which case this fails.
Tom Sepez0aa35312016-01-06 10:16:32 -0800264 search =
265 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0);
Tom Sepez526f6d52015-01-28 15:49:13 -0800266 EXPECT_FALSE(FPDFText_FindNext(search));
267 // TODO(tsepez): investigate strange index/count values in this state.
268 FPDFText_FindClose(search);
269
270 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700271 UnloadPage(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800272}
273
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800274// Test that the page has characters despite a bad stream length.
275TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) {
Wei Li091f7a02015-11-09 12:09:55 -0800276 EXPECT_TRUE(OpenDocument("bug_57.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -0800277 FPDF_PAGE page = LoadPage(0);
thestig4997b222016-06-07 10:46:22 -0700278 EXPECT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800279
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800280 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
thestig4997b222016-06-07 10:46:22 -0700281 EXPECT_TRUE(textpage);
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800282 EXPECT_EQ(13, FPDFText_CountChars(textpage));
Tom Sepez526f6d52015-01-28 15:49:13 -0800283
284 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700285 UnloadPage(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800286}
287
288TEST_F(FPDFTextEmbeddertest, WebLinks) {
Wei Li091f7a02015-11-09 12:09:55 -0800289 EXPECT_TRUE(OpenDocument("weblinks.pdf"));
Tom Sepezda8189e2015-01-30 14:41:50 -0800290 FPDF_PAGE page = LoadPage(0);
thestig4997b222016-06-07 10:46:22 -0700291 EXPECT_TRUE(page);
Tom Sepez526f6d52015-01-28 15:49:13 -0800292
293 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
thestig4997b222016-06-07 10:46:22 -0700294 EXPECT_TRUE(textpage);
Tom Sepez526f6d52015-01-28 15:49:13 -0800295
296 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
thestig4997b222016-06-07 10:46:22 -0700297 EXPECT_TRUE(pagelink);
Tom Sepez526f6d52015-01-28 15:49:13 -0800298
299 // Page contains two HTTP-style URLs.
300 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
301
Oliver Chang35e68a52015-12-09 12:44:33 -0800302 // Only a terminating NUL required for bogus links.
303 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0));
304 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0));
305 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800306
307 // Query the number of characters required for each link (incl NUL).
Oliver Chang35e68a52015-12-09 12:44:33 -0800308 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
309 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
Tom Sepez526f6d52015-01-28 15:49:13 -0800310
Lei Zhangd27acae2015-05-15 15:36:02 -0700311 static const char expected_url[] = "http://example.com?q=foo";
Wei Li05d53f02016-03-29 16:42:53 -0700312 static const size_t expected_len = sizeof(expected_url);
Tom Sepez526f6d52015-01-28 15:49:13 -0800313 unsigned short fixed_buffer[128];
314
315 // Retrieve a link with too small a buffer. Buffer will not be
316 // NUL-terminated, but must not be modified past indicated length,
317 // so pre-fill with a pattern to check write bounds.
318 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
319 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1));
320 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1));
321 EXPECT_EQ(0xbdbd, fixed_buffer[1]);
322
323 // Check buffer that doesn't have space for a terminating NUL.
324 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
Wei Li05d53f02016-03-29 16:42:53 -0700325 EXPECT_EQ(static_cast<int>(expected_len - 1),
326 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len - 1));
327 EXPECT_TRUE(
328 check_unsigned_shorts(expected_url, fixed_buffer, expected_len - 1));
329 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len - 1]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800330
331 // Retreive link with exactly-sized buffer.
332 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
Wei Li05d53f02016-03-29 16:42:53 -0700333 EXPECT_EQ(static_cast<int>(expected_len),
334 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len));
335 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len));
336 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]);
337 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800338
339 // Retreive link with ample-sized-buffer.
340 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
Wei Li05d53f02016-03-29 16:42:53 -0700341 EXPECT_EQ(static_cast<int>(expected_len),
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700342 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128));
Wei Li05d53f02016-03-29 16:42:53 -0700343 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len));
344 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]);
345 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]);
Tom Sepez526f6d52015-01-28 15:49:13 -0800346
347 // Each link rendered in a single rect in this test page.
348 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
349 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
350
351 // Each link rendered in a single rect in this test page.
352 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
353 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
354 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
355
356 // Check boundary of valid link index with valid rect index.
357 double left = 0.0;
358 double right = 0.0;
359 double top = 0.0;
360 double bottom = 0.0;
361 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom);
362 EXPECT_NEAR(50.791, left, 0.001);
363 EXPECT_NEAR(187.963, right, 0.001);
364 EXPECT_NEAR(97.624, bottom, 0.001);
365 EXPECT_NEAR(108.736, top, 0.001);
366
367 // Check that valid link with invalid rect index leaves parameters unchanged.
368 left = -1.0;
369 right = -1.0;
370 top = -1.0;
371 bottom = -1.0;
372 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom);
373 EXPECT_EQ(-1.0, left);
374 EXPECT_EQ(-1.0, right);
375 EXPECT_EQ(-1.0, bottom);
376 EXPECT_EQ(-1.0, top);
377
378 // Check that invalid link index leaves parameters unchanged.
379 left = -2.0;
380 right = -2.0;
381 top = -2.0;
382 bottom = -2.0;
383 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom);
384 EXPECT_EQ(-2.0, left);
385 EXPECT_EQ(-2.0, right);
386 EXPECT_EQ(-2.0, bottom);
387 EXPECT_EQ(-2.0, top);
388
389 FPDFLink_CloseWebLinks(pagelink);
390 FPDFText_ClosePage(textpage);
Lei Zhangd27acae2015-05-15 15:36:02 -0700391 UnloadPage(page);
Tom Sepez26b8a5b2015-01-27 12:42:36 -0800392}
Lei Zhang0f2ea022016-01-11 12:01:23 -0800393
Wei Li76309072017-03-16 17:31:03 -0700394TEST_F(FPDFTextEmbeddertest, WebLinksAcrossLines) {
395 EXPECT_TRUE(OpenDocument("weblinks_across_lines.pdf"));
396 FPDF_PAGE page = LoadPage(0);
397 EXPECT_TRUE(page);
398
399 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
400 EXPECT_TRUE(textpage);
401
402 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
403 EXPECT_TRUE(pagelink);
404
405 static const char* const kExpectedUrls[] = {
Wei Li6c8ed642017-05-19 22:17:38 -0700406 "http://example.com", // from "http://www.example.com?\r\nfoo"
Wei Li76309072017-03-16 17:31:03 -0700407 "http://example.com/", // from "http://www.example.com/\r\nfoo"
408 "http://example.com/test-foo", // from "http://example.com/test-\r\nfoo"
409 "http://abc.com/test-foo", // from "http://abc.com/test-\r\n\r\nfoo"
410 // Next two links from "http://www.example.com/\r\nhttp://www.abc.com/"
411 "http://example.com/", "http://www.abc.com",
412 };
413 static const int kNumLinks = static_cast<int>(FX_ArraySize(kExpectedUrls));
414
415 EXPECT_EQ(kNumLinks, FPDFLink_CountWebLinks(pagelink));
416
417 unsigned short fixed_buffer[128];
418 for (int i = 0; i < kNumLinks; i++) {
419 const size_t expected_len = strlen(kExpectedUrls[i]) + 1;
420 memset(fixed_buffer, 0, FX_ArraySize(fixed_buffer));
421 EXPECT_EQ(static_cast<int>(expected_len),
422 FPDFLink_GetURL(pagelink, i, nullptr, 0));
423 EXPECT_EQ(
424 static_cast<int>(expected_len),
425 FPDFLink_GetURL(pagelink, i, fixed_buffer, FX_ArraySize(fixed_buffer)));
426 EXPECT_TRUE(
427 check_unsigned_shorts(kExpectedUrls[i], fixed_buffer, expected_len));
428 }
429
430 FPDFLink_CloseWebLinks(pagelink);
431 FPDFText_ClosePage(textpage);
432 UnloadPage(page);
433}
434
435TEST_F(FPDFTextEmbeddertest, WebLinksAcrossLinesBug) {
436 EXPECT_TRUE(OpenDocument("bug_650.pdf"));
437 FPDF_PAGE page = LoadPage(0);
438 EXPECT_TRUE(page);
439
440 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
441 EXPECT_TRUE(textpage);
442
443 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
444 EXPECT_TRUE(pagelink);
445
446 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
447 unsigned short fixed_buffer[128] = {0};
448 static const char kExpectedUrl[] =
449 "http://tutorial45.com/learn-autocad-basics-day-166/";
450 static const int kUrlSize = static_cast<int>(sizeof(kExpectedUrl));
451
452 EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
453 EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, fixed_buffer,
454 FX_ArraySize(fixed_buffer)));
455 EXPECT_TRUE(check_unsigned_shorts(kExpectedUrl, fixed_buffer, kUrlSize));
456
457 FPDFLink_CloseWebLinks(pagelink);
458 FPDFText_ClosePage(textpage);
459 UnloadPage(page);
460}
461
Lei Zhang0f2ea022016-01-11 12:01:23 -0800462TEST_F(FPDFTextEmbeddertest, GetFontSize) {
463 EXPECT_TRUE(OpenDocument("hello_world.pdf"));
464 FPDF_PAGE page = LoadPage(0);
thestig4997b222016-06-07 10:46:22 -0700465 EXPECT_TRUE(page);
Lei Zhang0f2ea022016-01-11 12:01:23 -0800466
467 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
thestig4997b222016-06-07 10:46:22 -0700468 EXPECT_TRUE(textpage);
Lei Zhang0f2ea022016-01-11 12:01:23 -0800469
470 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
471 12, 12, 12, 1, 1, 16, 16, 16, 16, 16,
472 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
473
474 int count = FPDFText_CountChars(textpage);
Wei Li05d53f02016-03-29 16:42:53 -0700475 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), static_cast<size_t>(count));
Lei Zhang0f2ea022016-01-11 12:01:23 -0800476 for (int i = 0; i < count; ++i)
477 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
478
479 FPDFText_ClosePage(textpage);
480 UnloadPage(page);
481}
npm84be3a32016-09-15 13:27:21 -0700482
483TEST_F(FPDFTextEmbeddertest, ToUnicode) {
484 EXPECT_TRUE(OpenDocument("bug_583.pdf"));
485 FPDF_PAGE page = LoadPage(0);
486 EXPECT_TRUE(page);
487
488 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
489 EXPECT_TRUE(textpage);
490
491 ASSERT_EQ(1, FPDFText_CountChars(textpage));
492 EXPECT_EQ(static_cast<unsigned int>(0), FPDFText_GetUnicode(textpage, 0));
493
494 FPDFText_ClosePage(textpage);
495 UnloadPage(page);
496}
Lei Zhang65f31622017-10-24 08:36:44 -0700497
498TEST_F(FPDFTextEmbeddertest, Bug_921) {
499 EXPECT_TRUE(OpenDocument("bug_921.pdf"));
500 FPDF_PAGE page = LoadPage(0);
501 EXPECT_TRUE(page);
502
503 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
504 EXPECT_TRUE(textpage);
505
506 static constexpr unsigned int kData[] = {
507 1095, 1077, 1083, 1086, 1074, 1077, 1095, 1077, 1089, 1082, 1086, 1077,
508 32, 1089, 1090, 1088, 1072, 1076, 1072, 1085, 1080, 1077, 46, 32};
509 static constexpr int kStartIndex = 238;
510
511 ASSERT_EQ(268, FPDFText_CountChars(textpage));
512 for (size_t i = 0; i < FX_ArraySize(kData); ++i)
513 EXPECT_EQ(kData[i], FPDFText_GetUnicode(textpage, kStartIndex + i));
514
515 unsigned short buffer[FX_ArraySize(kData) + 1];
516 memset(buffer, 0xbd, sizeof(buffer));
517 int count =
518 FPDFText_GetText(textpage, kStartIndex, FX_ArraySize(buffer), buffer);
519 ASSERT_GT(count, 0);
520 ASSERT_EQ(FX_ArraySize(kData) + 1, static_cast<size_t>(count));
521 for (size_t i = 0; i < FX_ArraySize(kData); ++i)
522 EXPECT_EQ(kData[i], buffer[i]);
523 EXPECT_EQ(0, buffer[FX_ArraySize(kData)]);
524
525 FPDFText_ClosePage(textpage);
526 UnloadPage(page);
527}