blob: 80d56920bc7018c269e0099c38a238ca852ed904 [file] [log] [blame]
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07001// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Lei Zhanga6d9f0e2015-06-13 00:48:38 -07004
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07005// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Lei Zhangb4e7f302015-11-06 15:52:32 -08007#include "public/fpdf_text.h"
8
Lei Zhanga688a042015-11-09 13:57:49 -08009#include "core/include/fpdfdoc/fpdf_doc.h"
10#include "core/include/fpdftext/fpdf_text.h"
Tom Sepez1b246282015-11-25 15:15:31 -080011#include "fpdfsdk/include/fsdk_define.h"
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070012
Tom Sepez40e9ff32015-11-30 12:39:54 -080013#ifdef PDF_ENABLE_XFA
Lei Zhang875b9c92016-01-08 13:51:10 -080014#include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
15#include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
Tom Sepez40e9ff32015-11-30 12:39:54 -080016#endif // PDF_ENABLE_XFA
17
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070018#ifdef _WIN32
19#include <tchar.h>
20#endif
21
Nico Weber9d8ec5a2015-08-04 13:00:21 -070022DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
Tom Sepez1b246282015-11-25 15:15:31 -080023 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
24 if (!pPDFPage)
25 return nullptr;
Tom Sepez40e9ff32015-11-30 12:39:54 -080026#ifdef PDF_ENABLE_XFA
Nico Weber9d8ec5a2015-08-04 13:00:21 -070027 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
Nico Weber9d8ec5a2015-08-04 13:00:21 -070028 CPDFXFA_Document* pDoc = pPage->GetDocument();
29 CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
Tom Sepez40e9ff32015-11-30 12:39:54 -080030#else // PDF_ENABLE_XFA
31 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
32#endif // PDF_ENABLE_XFA
Tom Sepez1b246282015-11-25 15:15:31 -080033 IPDF_TextPage* textpage =
34 IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
Nico Weber9d8ec5a2015-08-04 13:00:21 -070035 textpage->ParseTextPage();
Nico Weber9d8ec5a2015-08-04 13:00:21 -070036 return textpage;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070037}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070038DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
39 delete (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070040}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070041DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
42 if (!text_page)
43 return -1;
44 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
45 return textpage->CountChars();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070046}
Lei Zhang0f2ea022016-01-11 12:01:23 -080047
Nico Weber9d8ec5a2015-08-04 13:00:21 -070048DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
49 int index) {
50 if (!text_page)
51 return -1;
52 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070053
Nico Weber9d8ec5a2015-08-04 13:00:21 -070054 if (index < 0 || index >= textpage->CountChars())
55 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070056
Nico Weber9d8ec5a2015-08-04 13:00:21 -070057 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080058 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070059 return charinfo.m_Unicode;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070060}
Lei Zhang0f2ea022016-01-11 12:01:23 -080061
Nico Weber9d8ec5a2015-08-04 13:00:21 -070062DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
63 int index) {
64 if (!text_page)
65 return 0;
66 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070067
Nico Weber9d8ec5a2015-08-04 13:00:21 -070068 if (index < 0 || index >= textpage->CountChars())
69 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070070
Nico Weber9d8ec5a2015-08-04 13:00:21 -070071 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080072 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070073 return charinfo.m_FontSize;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070074}
75
Nico Weber9d8ec5a2015-08-04 13:00:21 -070076DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
77 int index,
78 double* left,
79 double* right,
80 double* bottom,
81 double* top) {
82 if (!text_page)
83 return;
84 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -070085
Nico Weber9d8ec5a2015-08-04 13:00:21 -070086 if (index < 0 || index >= textpage->CountChars())
87 return;
88 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080089 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070090 *left = charinfo.m_CharBox.left;
91 *right = charinfo.m_CharBox.right;
92 *bottom = charinfo.m_CharBox.bottom;
93 *top = charinfo.m_CharBox.top;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070094}
95
Nico Weber9d8ec5a2015-08-04 13:00:21 -070096// select
97DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
98 double x,
99 double y,
Lei Zhang38a5a392015-08-13 17:52:16 -0700100 double xTolerance,
101 double yTolerance) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700102 if (!text_page)
103 return -3;
104 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhang38a5a392015-08-13 17:52:16 -0700105 return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
106 (FX_FLOAT)yTolerance);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700107}
108
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700109DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
110 int start,
111 int count,
112 unsigned short* result) {
113 if (!text_page)
114 return 0;
115 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -0700116
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700117 if (start >= textpage->CountChars())
118 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700119
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700120 CFX_WideString str = textpage->GetPageText(start, count);
121 if (str.GetLength() > count)
122 str = str.Left(count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700123
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700124 CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
125 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
126 cbUTF16str.GetLength());
127 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700128
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700129 return cbUTF16str.GetLength() / sizeof(unsigned short);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700130}
131
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700132DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
133 int start,
134 int count) {
135 if (!text_page)
136 return 0;
137 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
138 return textpage->CountRects(start, count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700139}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700140DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
141 int rect_index,
142 double* left,
143 double* top,
144 double* right,
145 double* bottom) {
146 if (!text_page)
147 return;
148 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
149 CFX_FloatRect rect;
150 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
151 *left = rect.left;
152 *top = rect.top;
153 *right = rect.right;
154 *bottom = rect.bottom;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700155}
156
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700157DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
158 double left,
159 double top,
160 double right,
161 double bottom,
162 unsigned short* buffer,
163 int buflen) {
164 if (!text_page)
165 return 0;
166 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
167 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
168 (FX_FLOAT)top);
169 CFX_WideString str = textpage->GetTextByRect(rect);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700170
Lei Zhang412e9082015-12-14 18:34:00 -0800171 if (buflen <= 0 || !buffer) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700172 return str.GetLength();
173 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700174
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700175 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
176 int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
177 int size = buflen > len ? len : buflen;
178 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
179 size * sizeof(unsigned short));
180 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700181
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700182 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700183}
184
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700185// Search
Dan Sinclair50cce602016-02-24 09:51:16 -0500186// -1 for end
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700187DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
188 FPDF_WIDESTRING findwhat,
189 unsigned long flags,
190 int start_index) {
191 if (!text_page)
192 return NULL;
193 IPDF_TextPageFind* textpageFind = NULL;
194 textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
195 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
196 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
197 start_index);
198 return textpageFind;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700199}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700200DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
201 if (!handle)
202 return FALSE;
203 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
204 return textpageFind->FindNext();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700205}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700206DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
207 if (!handle)
208 return FALSE;
209 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
210 return textpageFind->FindPrev();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700211}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700212DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
213 if (!handle)
214 return 0;
215 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
216 return textpageFind->GetCurOrder();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700217}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700218DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
219 if (!handle)
220 return 0;
221 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
222 return textpageFind->GetMatchedCount();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700223}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700224DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
225 if (!handle)
226 return;
227 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
228 delete textpageFind;
229 handle = NULL;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700230}
231
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700232// web link
233DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
234 if (!text_page)
235 return NULL;
236 IPDF_LinkExtract* pageLink = NULL;
237 pageLink = IPDF_LinkExtract::CreateLinkExtract();
238 pageLink->ExtractLinks((IPDF_TextPage*)text_page);
239 return pageLink;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700240}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700241DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
242 if (!link_page)
243 return 0;
244 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
245 return pageLink->CountLinks();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700246}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700247DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
248 int link_index,
249 unsigned short* buffer,
250 int buflen) {
251 if (!link_page)
252 return 0;
253 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
254 CFX_WideString url = pageLink->GetURL(link_index);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700255
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700256 CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
257 int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
Lei Zhang412e9082015-12-14 18:34:00 -0800258 if (!buffer || buflen <= 0)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700259 return len;
260 int size = len < buflen ? len : buflen;
261 if (size > 0) {
262 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
263 size * sizeof(unsigned short));
264 cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
265 }
266 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700267}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700268DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
269 int link_index) {
270 if (!link_page)
271 return 0;
272 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
273 CFX_RectArray rectArray;
274 pageLink->GetRects(link_index, rectArray);
275 return rectArray.GetSize();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700276}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700277DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
278 int link_index,
279 int rect_index,
280 double* left,
281 double* top,
282 double* right,
283 double* bottom) {
284 if (!link_page)
285 return;
286 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
287 CFX_RectArray rectArray;
288 pageLink->GetRects(link_index, rectArray);
289 if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
290 CFX_FloatRect rect = rectArray.GetAt(rect_index);
291 *left = rect.left;
292 *right = rect.right;
293 *top = rect.top;
294 *bottom = rect.bottom;
295 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700296}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700297DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
298 delete (IPDF_LinkExtract*)link_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700299}