blob: 3e64d8e2c40393fb5d74448c1b159d7d9fd4288e [file] [log] [blame]
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07001// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Lei Zhanga6d9f0e2015-06-13 00:48:38 -07004
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07005// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Lei Zhangb4e7f302015-11-06 15:52:32 -08007#include "public/fpdf_text.h"
8
Dan Sinclair13ee55a2016-03-14 15:56:00 -04009#include "core/fpdftext/include/ipdf_linkextract.h"
10#include "core/fpdftext/include/ipdf_textpage.h"
11#include "core/fpdftext/include/ipdf_textpagefind.h"
Lei Zhanga688a042015-11-09 13:57:49 -080012#include "core/include/fpdfdoc/fpdf_doc.h"
Tom Sepez1b246282015-11-25 15:15:31 -080013#include "fpdfsdk/include/fsdk_define.h"
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070014
Tom Sepez40e9ff32015-11-30 12:39:54 -080015#ifdef PDF_ENABLE_XFA
Lei Zhang875b9c92016-01-08 13:51:10 -080016#include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
17#include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
Tom Sepez40e9ff32015-11-30 12:39:54 -080018#endif // PDF_ENABLE_XFA
19
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070020#ifdef _WIN32
21#include <tchar.h>
22#endif
23
Nico Weber9d8ec5a2015-08-04 13:00:21 -070024DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
Tom Sepez1b246282015-11-25 15:15:31 -080025 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
26 if (!pPDFPage)
27 return nullptr;
Tom Sepez40e9ff32015-11-30 12:39:54 -080028#ifdef PDF_ENABLE_XFA
Nico Weber9d8ec5a2015-08-04 13:00:21 -070029 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
Nico Weber9d8ec5a2015-08-04 13:00:21 -070030 CPDFXFA_Document* pDoc = pPage->GetDocument();
31 CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
Tom Sepez40e9ff32015-11-30 12:39:54 -080032#else // PDF_ENABLE_XFA
33 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
34#endif // PDF_ENABLE_XFA
Tom Sepez1b246282015-11-25 15:15:31 -080035 IPDF_TextPage* textpage =
36 IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
Nico Weber9d8ec5a2015-08-04 13:00:21 -070037 textpage->ParseTextPage();
Nico Weber9d8ec5a2015-08-04 13:00:21 -070038 return textpage;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070039}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070040DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
41 delete (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070042}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070043DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
44 if (!text_page)
45 return -1;
46 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
47 return textpage->CountChars();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070048}
Lei Zhang0f2ea022016-01-11 12:01:23 -080049
Nico Weber9d8ec5a2015-08-04 13:00:21 -070050DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
51 int index) {
52 if (!text_page)
53 return -1;
54 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070055
Nico Weber9d8ec5a2015-08-04 13:00:21 -070056 if (index < 0 || index >= textpage->CountChars())
57 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070058
Nico Weber9d8ec5a2015-08-04 13:00:21 -070059 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080060 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070061 return charinfo.m_Unicode;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070062}
Lei Zhang0f2ea022016-01-11 12:01:23 -080063
Nico Weber9d8ec5a2015-08-04 13:00:21 -070064DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
65 int index) {
66 if (!text_page)
67 return 0;
68 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070069
Nico Weber9d8ec5a2015-08-04 13:00:21 -070070 if (index < 0 || index >= textpage->CountChars())
71 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070072
Nico Weber9d8ec5a2015-08-04 13:00:21 -070073 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080074 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070075 return charinfo.m_FontSize;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070076}
77
Nico Weber9d8ec5a2015-08-04 13:00:21 -070078DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
79 int index,
80 double* left,
81 double* right,
82 double* bottom,
83 double* top) {
84 if (!text_page)
85 return;
86 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -070087
Nico Weber9d8ec5a2015-08-04 13:00:21 -070088 if (index < 0 || index >= textpage->CountChars())
89 return;
90 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080091 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070092 *left = charinfo.m_CharBox.left;
93 *right = charinfo.m_CharBox.right;
94 *bottom = charinfo.m_CharBox.bottom;
95 *top = charinfo.m_CharBox.top;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070096}
97
Nico Weber9d8ec5a2015-08-04 13:00:21 -070098// select
99DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
100 double x,
101 double y,
Lei Zhang38a5a392015-08-13 17:52:16 -0700102 double xTolerance,
103 double yTolerance) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700104 if (!text_page)
105 return -3;
106 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhang38a5a392015-08-13 17:52:16 -0700107 return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
108 (FX_FLOAT)yTolerance);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700109}
110
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700111DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
112 int start,
113 int count,
114 unsigned short* result) {
115 if (!text_page)
116 return 0;
117 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -0700118
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700119 if (start >= textpage->CountChars())
120 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700121
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700122 CFX_WideString str = textpage->GetPageText(start, count);
123 if (str.GetLength() > count)
124 str = str.Left(count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700125
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700126 CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
127 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
128 cbUTF16str.GetLength());
129 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700130
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700131 return cbUTF16str.GetLength() / sizeof(unsigned short);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700132}
133
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700134DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
135 int start,
136 int count) {
137 if (!text_page)
138 return 0;
139 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
140 return textpage->CountRects(start, count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700141}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700142DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
143 int rect_index,
144 double* left,
145 double* top,
146 double* right,
147 double* bottom) {
148 if (!text_page)
149 return;
150 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
151 CFX_FloatRect rect;
152 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
153 *left = rect.left;
154 *top = rect.top;
155 *right = rect.right;
156 *bottom = rect.bottom;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700157}
158
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700159DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
160 double left,
161 double top,
162 double right,
163 double bottom,
164 unsigned short* buffer,
165 int buflen) {
166 if (!text_page)
167 return 0;
168 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
169 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
170 (FX_FLOAT)top);
171 CFX_WideString str = textpage->GetTextByRect(rect);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700172
Lei Zhang412e9082015-12-14 18:34:00 -0800173 if (buflen <= 0 || !buffer) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700174 return str.GetLength();
175 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700176
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700177 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
178 int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
179 int size = buflen > len ? len : buflen;
180 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
181 size * sizeof(unsigned short));
182 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700183
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700184 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700185}
186
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700187// Search
Dan Sinclair50cce602016-02-24 09:51:16 -0500188// -1 for end
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700189DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
190 FPDF_WIDESTRING findwhat,
191 unsigned long flags,
192 int start_index) {
193 if (!text_page)
194 return NULL;
195 IPDF_TextPageFind* textpageFind = NULL;
196 textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
197 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
198 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
199 start_index);
200 return textpageFind;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700201}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700202DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
203 if (!handle)
204 return FALSE;
205 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
206 return textpageFind->FindNext();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700207}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700208DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
209 if (!handle)
210 return FALSE;
211 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
212 return textpageFind->FindPrev();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700213}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700214DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
215 if (!handle)
216 return 0;
217 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
218 return textpageFind->GetCurOrder();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700219}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700220DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
221 if (!handle)
222 return 0;
223 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
224 return textpageFind->GetMatchedCount();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700225}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700226DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
227 if (!handle)
228 return;
229 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
230 delete textpageFind;
231 handle = NULL;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700232}
233
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700234// web link
235DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
236 if (!text_page)
237 return NULL;
238 IPDF_LinkExtract* pageLink = NULL;
239 pageLink = IPDF_LinkExtract::CreateLinkExtract();
240 pageLink->ExtractLinks((IPDF_TextPage*)text_page);
241 return pageLink;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700242}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700243DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
244 if (!link_page)
245 return 0;
246 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
247 return pageLink->CountLinks();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700248}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700249DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
250 int link_index,
251 unsigned short* buffer,
252 int buflen) {
253 if (!link_page)
254 return 0;
255 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
256 CFX_WideString url = pageLink->GetURL(link_index);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700257
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700258 CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
259 int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
Lei Zhang412e9082015-12-14 18:34:00 -0800260 if (!buffer || buflen <= 0)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700261 return len;
262 int size = len < buflen ? len : buflen;
263 if (size > 0) {
264 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
265 size * sizeof(unsigned short));
266 cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
267 }
268 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700269}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700270DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
271 int link_index) {
272 if (!link_page)
273 return 0;
274 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
275 CFX_RectArray rectArray;
276 pageLink->GetRects(link_index, rectArray);
277 return rectArray.GetSize();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700278}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700279DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
280 int link_index,
281 int rect_index,
282 double* left,
283 double* top,
284 double* right,
285 double* bottom) {
286 if (!link_page)
287 return;
288 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
289 CFX_RectArray rectArray;
290 pageLink->GetRects(link_index, rectArray);
291 if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
292 CFX_FloatRect rect = rectArray.GetAt(rect_index);
293 *left = rect.left;
294 *right = rect.right;
295 *top = rect.top;
296 *bottom = rect.bottom;
297 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700298}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700299DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
300 delete (IPDF_LinkExtract*)link_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700301}