blob: 24b592a88beea1ef8d4d5c8ca7969019c070f9b2 [file] [log] [blame]
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07001// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Lei Zhanga6d9f0e2015-06-13 00:48:38 -07004
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07005// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Lei Zhangb4e7f302015-11-06 15:52:32 -08007#include "public/fpdf_text.h"
8
Dan Sinclair455a4192016-03-16 09:48:56 -04009#include "core/fpdfapi/fpdf_page/include/cpdf_page.h"
Dan Sinclair13ee55a2016-03-14 15:56:00 -040010#include "core/fpdftext/include/ipdf_linkextract.h"
11#include "core/fpdftext/include/ipdf_textpage.h"
12#include "core/fpdftext/include/ipdf_textpagefind.h"
Lei Zhanga688a042015-11-09 13:57:49 -080013#include "core/include/fpdfdoc/fpdf_doc.h"
Tom Sepez1b246282015-11-25 15:15:31 -080014#include "fpdfsdk/include/fsdk_define.h"
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070015
Tom Sepez40e9ff32015-11-30 12:39:54 -080016#ifdef PDF_ENABLE_XFA
Lei Zhang875b9c92016-01-08 13:51:10 -080017#include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
18#include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
Tom Sepez40e9ff32015-11-30 12:39:54 -080019#endif // PDF_ENABLE_XFA
20
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070021#ifdef _WIN32
22#include <tchar.h>
23#endif
24
Nico Weber9d8ec5a2015-08-04 13:00:21 -070025DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
Tom Sepez1b246282015-11-25 15:15:31 -080026 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
27 if (!pPDFPage)
28 return nullptr;
Tom Sepez40e9ff32015-11-30 12:39:54 -080029#ifdef PDF_ENABLE_XFA
Nico Weber9d8ec5a2015-08-04 13:00:21 -070030 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
Nico Weber9d8ec5a2015-08-04 13:00:21 -070031 CPDFXFA_Document* pDoc = pPage->GetDocument();
32 CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
Tom Sepez40e9ff32015-11-30 12:39:54 -080033#else // PDF_ENABLE_XFA
34 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
35#endif // PDF_ENABLE_XFA
Tom Sepez1b246282015-11-25 15:15:31 -080036 IPDF_TextPage* textpage =
37 IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
Nico Weber9d8ec5a2015-08-04 13:00:21 -070038 textpage->ParseTextPage();
Nico Weber9d8ec5a2015-08-04 13:00:21 -070039 return textpage;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070040}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070041DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
42 delete (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070043}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070044DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
45 if (!text_page)
46 return -1;
47 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
48 return textpage->CountChars();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070049}
Lei Zhang0f2ea022016-01-11 12:01:23 -080050
Nico Weber9d8ec5a2015-08-04 13:00:21 -070051DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
52 int index) {
53 if (!text_page)
Wei Lid4e8f122016-03-21 11:20:44 -070054 return 0;
Nico Weber9d8ec5a2015-08-04 13:00:21 -070055 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070056
Nico Weber9d8ec5a2015-08-04 13:00:21 -070057 if (index < 0 || index >= textpage->CountChars())
58 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070059
Nico Weber9d8ec5a2015-08-04 13:00:21 -070060 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080061 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070062 return charinfo.m_Unicode;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070063}
Lei Zhang0f2ea022016-01-11 12:01:23 -080064
Nico Weber9d8ec5a2015-08-04 13:00:21 -070065DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
66 int index) {
67 if (!text_page)
68 return 0;
69 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070070
Nico Weber9d8ec5a2015-08-04 13:00:21 -070071 if (index < 0 || index >= textpage->CountChars())
72 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070073
Nico Weber9d8ec5a2015-08-04 13:00:21 -070074 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080075 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070076 return charinfo.m_FontSize;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070077}
78
Nico Weber9d8ec5a2015-08-04 13:00:21 -070079DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
80 int index,
81 double* left,
82 double* right,
83 double* bottom,
84 double* top) {
85 if (!text_page)
86 return;
87 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -070088
Nico Weber9d8ec5a2015-08-04 13:00:21 -070089 if (index < 0 || index >= textpage->CountChars())
90 return;
91 FPDF_CHAR_INFO charinfo;
Lei Zhang0f2ea022016-01-11 12:01:23 -080092 textpage->GetCharInfo(index, &charinfo);
Nico Weber9d8ec5a2015-08-04 13:00:21 -070093 *left = charinfo.m_CharBox.left;
94 *right = charinfo.m_CharBox.right;
95 *bottom = charinfo.m_CharBox.bottom;
96 *top = charinfo.m_CharBox.top;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070097}
98
Nico Weber9d8ec5a2015-08-04 13:00:21 -070099// select
100DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
101 double x,
102 double y,
Lei Zhang38a5a392015-08-13 17:52:16 -0700103 double xTolerance,
104 double yTolerance) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700105 if (!text_page)
106 return -3;
107 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhang38a5a392015-08-13 17:52:16 -0700108 return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
109 (FX_FLOAT)yTolerance);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700110}
111
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700112DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
113 int start,
114 int count,
115 unsigned short* result) {
116 if (!text_page)
117 return 0;
118 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -0700119
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700120 if (start >= textpage->CountChars())
121 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700122
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700123 CFX_WideString str = textpage->GetPageText(start, count);
124 if (str.GetLength() > count)
125 str = str.Left(count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700126
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700127 CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
128 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
129 cbUTF16str.GetLength());
130 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700131
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700132 return cbUTF16str.GetLength() / sizeof(unsigned short);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700133}
134
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700135DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
136 int start,
137 int count) {
138 if (!text_page)
139 return 0;
140 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
141 return textpage->CountRects(start, count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700142}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700143DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
144 int rect_index,
145 double* left,
146 double* top,
147 double* right,
148 double* bottom) {
149 if (!text_page)
150 return;
151 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
152 CFX_FloatRect rect;
153 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
154 *left = rect.left;
155 *top = rect.top;
156 *right = rect.right;
157 *bottom = rect.bottom;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700158}
159
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700160DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
161 double left,
162 double top,
163 double right,
164 double bottom,
165 unsigned short* buffer,
166 int buflen) {
167 if (!text_page)
168 return 0;
169 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
170 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
171 (FX_FLOAT)top);
172 CFX_WideString str = textpage->GetTextByRect(rect);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700173
Lei Zhang412e9082015-12-14 18:34:00 -0800174 if (buflen <= 0 || !buffer) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700175 return str.GetLength();
176 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700177
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700178 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
179 int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
180 int size = buflen > len ? len : buflen;
181 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
182 size * sizeof(unsigned short));
183 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700184
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700185 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700186}
187
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700188// Search
Dan Sinclair50cce602016-02-24 09:51:16 -0500189// -1 for end
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700190DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
191 FPDF_WIDESTRING findwhat,
192 unsigned long flags,
193 int start_index) {
194 if (!text_page)
195 return NULL;
196 IPDF_TextPageFind* textpageFind = NULL;
197 textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
198 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
199 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
200 start_index);
201 return textpageFind;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700202}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700203DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
204 if (!handle)
205 return FALSE;
206 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
207 return textpageFind->FindNext();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700208}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700209DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
210 if (!handle)
211 return FALSE;
212 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
213 return textpageFind->FindPrev();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700214}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700215DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
216 if (!handle)
217 return 0;
218 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
219 return textpageFind->GetCurOrder();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700220}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700221DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
222 if (!handle)
223 return 0;
224 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
225 return textpageFind->GetMatchedCount();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700226}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700227DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
228 if (!handle)
229 return;
230 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
231 delete textpageFind;
232 handle = NULL;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700233}
234
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700235// web link
236DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
237 if (!text_page)
238 return NULL;
239 IPDF_LinkExtract* pageLink = NULL;
240 pageLink = IPDF_LinkExtract::CreateLinkExtract();
241 pageLink->ExtractLinks((IPDF_TextPage*)text_page);
242 return pageLink;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700243}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700244DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
245 if (!link_page)
246 return 0;
247 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
248 return pageLink->CountLinks();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700249}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700250DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
251 int link_index,
252 unsigned short* buffer,
253 int buflen) {
254 if (!link_page)
255 return 0;
256 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
257 CFX_WideString url = pageLink->GetURL(link_index);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700258
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700259 CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
260 int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
Lei Zhang412e9082015-12-14 18:34:00 -0800261 if (!buffer || buflen <= 0)
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700262 return len;
263 int size = len < buflen ? len : buflen;
264 if (size > 0) {
265 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
266 size * sizeof(unsigned short));
267 cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
268 }
269 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700270}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700271DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
272 int link_index) {
273 if (!link_page)
274 return 0;
275 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
276 CFX_RectArray rectArray;
277 pageLink->GetRects(link_index, rectArray);
278 return rectArray.GetSize();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700279}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700280DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
281 int link_index,
282 int rect_index,
283 double* left,
284 double* top,
285 double* right,
286 double* bottom) {
287 if (!link_page)
288 return;
289 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
290 CFX_RectArray rectArray;
291 pageLink->GetRects(link_index, rectArray);
292 if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
293 CFX_FloatRect rect = rectArray.GetAt(rect_index);
294 *left = rect.left;
295 *right = rect.right;
296 *top = rect.top;
297 *bottom = rect.bottom;
298 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700299}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700300DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
301 delete (IPDF_LinkExtract*)link_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700302}