blob: 333ef3b9a129d8709ce69db63ecc12f31460069a [file] [log] [blame]
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07001// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Lei Zhanga6d9f0e2015-06-13 00:48:38 -07004
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -07005// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Lei Zhangb4e7f302015-11-06 15:52:32 -08007#include "public/fpdf_text.h"
8
Lei Zhange5b0bd12015-06-19 17:15:41 -07009#include "../../core/include/fpdfdoc/fpdf_doc.h"
10#include "../../core/include/fpdftext/fpdf_text.h"
Bo Xufdc00a72014-10-28 23:03:33 -070011#include "../include/fpdfxfa/fpdfxfa_doc.h"
12#include "../include/fpdfxfa/fpdfxfa_page.h"
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070013
14#ifdef _WIN32
15#include <tchar.h>
16#endif
17
Nico Weber9d8ec5a2015-08-04 13:00:21 -070018DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
19 if (!page)
20 return NULL;
21 IPDF_TextPage* textpage = NULL;
22 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
23 if (!pPage->GetPDFPage())
24 return NULL;
25 CPDFXFA_Document* pDoc = pPage->GetDocument();
26 CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
27 textpage = IPDF_TextPage::CreateTextPage((CPDF_Page*)pPage->GetPDFPage(),
28 viewRef.IsDirectionR2L());
29 textpage->ParseTextPage();
Nico Weber9d8ec5a2015-08-04 13:00:21 -070030 return textpage;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070031}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070032DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
33 delete (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070034}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070035DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
36 if (!text_page)
37 return -1;
38 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
39 return textpage->CountChars();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070040}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070041DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
42 int index) {
43 if (!text_page)
44 return -1;
45 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070046
Nico Weber9d8ec5a2015-08-04 13:00:21 -070047 if (index < 0 || index >= textpage->CountChars())
48 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070049
Nico Weber9d8ec5a2015-08-04 13:00:21 -070050 FPDF_CHAR_INFO charinfo;
51 textpage->GetCharInfo(index, charinfo);
52 return charinfo.m_Unicode;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070053}
Nico Weber9d8ec5a2015-08-04 13:00:21 -070054DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
55 int index) {
56 if (!text_page)
57 return 0;
58 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070059
Nico Weber9d8ec5a2015-08-04 13:00:21 -070060 if (index < 0 || index >= textpage->CountChars())
61 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070062
Nico Weber9d8ec5a2015-08-04 13:00:21 -070063 FPDF_CHAR_INFO charinfo;
64 textpage->GetCharInfo(index, charinfo);
65 return charinfo.m_FontSize;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070066}
67
Nico Weber9d8ec5a2015-08-04 13:00:21 -070068DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
69 int index,
70 double* left,
71 double* right,
72 double* bottom,
73 double* top) {
74 if (!text_page)
75 return;
76 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -070077
Nico Weber9d8ec5a2015-08-04 13:00:21 -070078 if (index < 0 || index >= textpage->CountChars())
79 return;
80 FPDF_CHAR_INFO charinfo;
81 textpage->GetCharInfo(index, charinfo);
82 *left = charinfo.m_CharBox.left;
83 *right = charinfo.m_CharBox.right;
84 *bottom = charinfo.m_CharBox.bottom;
85 *top = charinfo.m_CharBox.top;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070086}
87
Nico Weber9d8ec5a2015-08-04 13:00:21 -070088// select
89DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
90 double x,
91 double y,
Lei Zhang38a5a392015-08-13 17:52:16 -070092 double xTolerance,
93 double yTolerance) {
Nico Weber9d8ec5a2015-08-04 13:00:21 -070094 if (!text_page)
95 return -3;
96 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhang38a5a392015-08-13 17:52:16 -070097 return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
98 (FX_FLOAT)yTolerance);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -070099}
100
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700101DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
102 int start,
103 int count,
104 unsigned short* result) {
105 if (!text_page)
106 return 0;
107 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
Lei Zhanga6d9f0e2015-06-13 00:48:38 -0700108
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700109 if (start >= textpage->CountChars())
110 return 0;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700111
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700112 CFX_WideString str = textpage->GetPageText(start, count);
113 if (str.GetLength() > count)
114 str = str.Left(count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700115
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700116 CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
117 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
118 cbUTF16str.GetLength());
119 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700120
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700121 return cbUTF16str.GetLength() / sizeof(unsigned short);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700122}
123
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700124DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
125 int start,
126 int count) {
127 if (!text_page)
128 return 0;
129 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
130 return textpage->CountRects(start, count);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700131}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700132DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
133 int rect_index,
134 double* left,
135 double* top,
136 double* right,
137 double* bottom) {
138 if (!text_page)
139 return;
140 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
141 CFX_FloatRect rect;
142 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
143 *left = rect.left;
144 *top = rect.top;
145 *right = rect.right;
146 *bottom = rect.bottom;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700147}
148
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700149DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
150 double left,
151 double top,
152 double right,
153 double bottom,
154 unsigned short* buffer,
155 int buflen) {
156 if (!text_page)
157 return 0;
158 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
159 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
160 (FX_FLOAT)top);
161 CFX_WideString str = textpage->GetTextByRect(rect);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700162
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700163 if (buflen <= 0 || buffer == NULL) {
164 return str.GetLength();
165 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700166
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700167 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
168 int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
169 int size = buflen > len ? len : buflen;
170 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
171 size * sizeof(unsigned short));
172 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700173
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700174 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700175}
176
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700177// Search
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700178//-1 for end
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700179DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
180 FPDF_WIDESTRING findwhat,
181 unsigned long flags,
182 int start_index) {
183 if (!text_page)
184 return NULL;
185 IPDF_TextPageFind* textpageFind = NULL;
186 textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
187 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
188 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
189 start_index);
190 return textpageFind;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700191}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700192DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
193 if (!handle)
194 return FALSE;
195 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
196 return textpageFind->FindNext();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700197}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700198DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
199 if (!handle)
200 return FALSE;
201 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
202 return textpageFind->FindPrev();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700203}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700204DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
205 if (!handle)
206 return 0;
207 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
208 return textpageFind->GetCurOrder();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700209}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700210DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
211 if (!handle)
212 return 0;
213 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
214 return textpageFind->GetMatchedCount();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700215}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700216DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
217 if (!handle)
218 return;
219 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
220 delete textpageFind;
221 handle = NULL;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700222}
223
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700224// web link
225DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
226 if (!text_page)
227 return NULL;
228 IPDF_LinkExtract* pageLink = NULL;
229 pageLink = IPDF_LinkExtract::CreateLinkExtract();
230 pageLink->ExtractLinks((IPDF_TextPage*)text_page);
231 return pageLink;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700232}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700233DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
234 if (!link_page)
235 return 0;
236 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
237 return pageLink->CountLinks();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700238}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700239DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
240 int link_index,
241 unsigned short* buffer,
242 int buflen) {
243 if (!link_page)
244 return 0;
245 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
246 CFX_WideString url = pageLink->GetURL(link_index);
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700247
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700248 CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
249 int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
250 if (buffer == NULL || buflen <= 0)
251 return len;
252 int size = len < buflen ? len : buflen;
253 if (size > 0) {
254 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
255 size * sizeof(unsigned short));
256 cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
257 }
258 return size;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700259}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700260DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
261 int link_index) {
262 if (!link_page)
263 return 0;
264 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
265 CFX_RectArray rectArray;
266 pageLink->GetRects(link_index, rectArray);
267 return rectArray.GetSize();
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700268}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700269DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
270 int link_index,
271 int rect_index,
272 double* left,
273 double* top,
274 double* right,
275 double* bottom) {
276 if (!link_page)
277 return;
278 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
279 CFX_RectArray rectArray;
280 pageLink->GetRects(link_index, rectArray);
281 if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
282 CFX_FloatRect rect = rectArray.GetAt(rect_index);
283 *left = rect.left;
284 *right = rect.right;
285 *top = rect.top;
286 *bottom = rect.bottom;
287 }
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700288}
Nico Weber9d8ec5a2015-08-04 13:00:21 -0700289DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
290 delete (IPDF_LinkExtract*)link_page;
John Abd-El-Malek3f3b45c2014-05-23 17:28:10 -0700291}