John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 1 | // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
Lei Zhang | a6d9f0e | 2015-06-13 00:48:38 -0700 | [diff] [blame] | 4 | |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 5 | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | |
Lei Zhang | b4e7f30 | 2015-11-06 15:52:32 -0800 | [diff] [blame] | 7 | #include "public/fpdf_text.h" |
| 8 | |
Lei Zhang | e5b0bd1 | 2015-06-19 17:15:41 -0700 | [diff] [blame] | 9 | #include "../../core/include/fpdfdoc/fpdf_doc.h" |
| 10 | #include "../../core/include/fpdftext/fpdf_text.h" |
Bo Xu | fdc00a7 | 2014-10-28 23:03:33 -0700 | [diff] [blame] | 11 | #include "../include/fpdfxfa/fpdfxfa_doc.h" |
| 12 | #include "../include/fpdfxfa/fpdfxfa_page.h" |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 13 | |
| 14 | #ifdef _WIN32 |
| 15 | #include <tchar.h> |
| 16 | #endif |
| 17 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 18 | DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) { |
| 19 | if (!page) |
| 20 | return NULL; |
| 21 | IPDF_TextPage* textpage = NULL; |
| 22 | CPDFXFA_Page* pPage = (CPDFXFA_Page*)page; |
| 23 | if (!pPage->GetPDFPage()) |
| 24 | return NULL; |
| 25 | CPDFXFA_Document* pDoc = pPage->GetDocument(); |
| 26 | CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc()); |
| 27 | textpage = IPDF_TextPage::CreateTextPage((CPDF_Page*)pPage->GetPDFPage(), |
| 28 | viewRef.IsDirectionR2L()); |
| 29 | textpage->ParseTextPage(); |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 30 | return textpage; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 31 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 32 | DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) { |
| 33 | delete (IPDF_TextPage*)text_page; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 34 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 35 | DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) { |
| 36 | if (!text_page) |
| 37 | return -1; |
| 38 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
| 39 | return textpage->CountChars(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 40 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 41 | DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, |
| 42 | int index) { |
| 43 | if (!text_page) |
| 44 | return -1; |
| 45 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 46 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 47 | if (index < 0 || index >= textpage->CountChars()) |
| 48 | return 0; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 49 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 50 | FPDF_CHAR_INFO charinfo; |
| 51 | textpage->GetCharInfo(index, charinfo); |
| 52 | return charinfo.m_Unicode; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 53 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 54 | DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, |
| 55 | int index) { |
| 56 | if (!text_page) |
| 57 | return 0; |
| 58 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 59 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 60 | if (index < 0 || index >= textpage->CountChars()) |
| 61 | return 0; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 62 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 63 | FPDF_CHAR_INFO charinfo; |
| 64 | textpage->GetCharInfo(index, charinfo); |
| 65 | return charinfo.m_FontSize; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 66 | } |
| 67 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 68 | DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, |
| 69 | int index, |
| 70 | double* left, |
| 71 | double* right, |
| 72 | double* bottom, |
| 73 | double* top) { |
| 74 | if (!text_page) |
| 75 | return; |
| 76 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
Lei Zhang | a6d9f0e | 2015-06-13 00:48:38 -0700 | [diff] [blame] | 77 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 78 | if (index < 0 || index >= textpage->CountChars()) |
| 79 | return; |
| 80 | FPDF_CHAR_INFO charinfo; |
| 81 | textpage->GetCharInfo(index, charinfo); |
| 82 | *left = charinfo.m_CharBox.left; |
| 83 | *right = charinfo.m_CharBox.right; |
| 84 | *bottom = charinfo.m_CharBox.bottom; |
| 85 | *top = charinfo.m_CharBox.top; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 86 | } |
| 87 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 88 | // select |
| 89 | DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, |
| 90 | double x, |
| 91 | double y, |
Lei Zhang | 38a5a39 | 2015-08-13 17:52:16 -0700 | [diff] [blame] | 92 | double xTolerance, |
| 93 | double yTolerance) { |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 94 | if (!text_page) |
| 95 | return -3; |
| 96 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
Lei Zhang | 38a5a39 | 2015-08-13 17:52:16 -0700 | [diff] [blame] | 97 | return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance, |
| 98 | (FX_FLOAT)yTolerance); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 99 | } |
| 100 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 101 | DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, |
| 102 | int start, |
| 103 | int count, |
| 104 | unsigned short* result) { |
| 105 | if (!text_page) |
| 106 | return 0; |
| 107 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
Lei Zhang | a6d9f0e | 2015-06-13 00:48:38 -0700 | [diff] [blame] | 108 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 109 | if (start >= textpage->CountChars()) |
| 110 | return 0; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 111 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 112 | CFX_WideString str = textpage->GetPageText(start, count); |
| 113 | if (str.GetLength() > count) |
| 114 | str = str.Left(count); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 115 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 116 | CFX_ByteString cbUTF16str = str.UTF16LE_Encode(); |
| 117 | FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), |
| 118 | cbUTF16str.GetLength()); |
| 119 | cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength()); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 120 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 121 | return cbUTF16str.GetLength() / sizeof(unsigned short); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 122 | } |
| 123 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 124 | DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, |
| 125 | int start, |
| 126 | int count) { |
| 127 | if (!text_page) |
| 128 | return 0; |
| 129 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
| 130 | return textpage->CountRects(start, count); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 131 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 132 | DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, |
| 133 | int rect_index, |
| 134 | double* left, |
| 135 | double* top, |
| 136 | double* right, |
| 137 | double* bottom) { |
| 138 | if (!text_page) |
| 139 | return; |
| 140 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
| 141 | CFX_FloatRect rect; |
| 142 | textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom); |
| 143 | *left = rect.left; |
| 144 | *top = rect.top; |
| 145 | *right = rect.right; |
| 146 | *bottom = rect.bottom; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 147 | } |
| 148 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 149 | DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, |
| 150 | double left, |
| 151 | double top, |
| 152 | double right, |
| 153 | double bottom, |
| 154 | unsigned short* buffer, |
| 155 | int buflen) { |
| 156 | if (!text_page) |
| 157 | return 0; |
| 158 | IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; |
| 159 | CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right, |
| 160 | (FX_FLOAT)top); |
| 161 | CFX_WideString str = textpage->GetTextByRect(rect); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 162 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 163 | if (buflen <= 0 || buffer == NULL) { |
| 164 | return str.GetLength(); |
| 165 | } |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 166 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 167 | CFX_ByteString cbUTF16Str = str.UTF16LE_Encode(); |
| 168 | int len = cbUTF16Str.GetLength() / sizeof(unsigned short); |
| 169 | int size = buflen > len ? len : buflen; |
| 170 | FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)), |
| 171 | size * sizeof(unsigned short)); |
| 172 | cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short)); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 173 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 174 | return size; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 175 | } |
| 176 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 177 | // Search |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 178 | //-1 for end |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 179 | DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, |
| 180 | FPDF_WIDESTRING findwhat, |
| 181 | unsigned long flags, |
| 182 | int start_index) { |
| 183 | if (!text_page) |
| 184 | return NULL; |
| 185 | IPDF_TextPageFind* textpageFind = NULL; |
| 186 | textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page); |
| 187 | FX_STRSIZE len = CFX_WideString::WStringLength(findwhat); |
| 188 | textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags, |
| 189 | start_index); |
| 190 | return textpageFind; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 191 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 192 | DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) { |
| 193 | if (!handle) |
| 194 | return FALSE; |
| 195 | IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; |
| 196 | return textpageFind->FindNext(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 197 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 198 | DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) { |
| 199 | if (!handle) |
| 200 | return FALSE; |
| 201 | IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; |
| 202 | return textpageFind->FindPrev(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 203 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 204 | DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) { |
| 205 | if (!handle) |
| 206 | return 0; |
| 207 | IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; |
| 208 | return textpageFind->GetCurOrder(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 209 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 210 | DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) { |
| 211 | if (!handle) |
| 212 | return 0; |
| 213 | IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; |
| 214 | return textpageFind->GetMatchedCount(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 215 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 216 | DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) { |
| 217 | if (!handle) |
| 218 | return; |
| 219 | IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; |
| 220 | delete textpageFind; |
| 221 | handle = NULL; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 222 | } |
| 223 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 224 | // web link |
| 225 | DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) { |
| 226 | if (!text_page) |
| 227 | return NULL; |
| 228 | IPDF_LinkExtract* pageLink = NULL; |
| 229 | pageLink = IPDF_LinkExtract::CreateLinkExtract(); |
| 230 | pageLink->ExtractLinks((IPDF_TextPage*)text_page); |
| 231 | return pageLink; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 232 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 233 | DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) { |
| 234 | if (!link_page) |
| 235 | return 0; |
| 236 | IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; |
| 237 | return pageLink->CountLinks(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 238 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 239 | DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, |
| 240 | int link_index, |
| 241 | unsigned short* buffer, |
| 242 | int buflen) { |
| 243 | if (!link_page) |
| 244 | return 0; |
| 245 | IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; |
| 246 | CFX_WideString url = pageLink->GetURL(link_index); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 247 | |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 248 | CFX_ByteString cbUTF16URL = url.UTF16LE_Encode(); |
| 249 | int len = cbUTF16URL.GetLength() / sizeof(unsigned short); |
| 250 | if (buffer == NULL || buflen <= 0) |
| 251 | return len; |
| 252 | int size = len < buflen ? len : buflen; |
| 253 | if (size > 0) { |
| 254 | FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)), |
| 255 | size * sizeof(unsigned short)); |
| 256 | cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short)); |
| 257 | } |
| 258 | return size; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 259 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 260 | DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, |
| 261 | int link_index) { |
| 262 | if (!link_page) |
| 263 | return 0; |
| 264 | IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; |
| 265 | CFX_RectArray rectArray; |
| 266 | pageLink->GetRects(link_index, rectArray); |
| 267 | return rectArray.GetSize(); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 268 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 269 | DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, |
| 270 | int link_index, |
| 271 | int rect_index, |
| 272 | double* left, |
| 273 | double* top, |
| 274 | double* right, |
| 275 | double* bottom) { |
| 276 | if (!link_page) |
| 277 | return; |
| 278 | IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; |
| 279 | CFX_RectArray rectArray; |
| 280 | pageLink->GetRects(link_index, rectArray); |
| 281 | if (rect_index >= 0 && rect_index < rectArray.GetSize()) { |
| 282 | CFX_FloatRect rect = rectArray.GetAt(rect_index); |
| 283 | *left = rect.left; |
| 284 | *right = rect.right; |
| 285 | *top = rect.top; |
| 286 | *bottom = rect.bottom; |
| 287 | } |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 288 | } |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 289 | DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) { |
| 290 | delete (IPDF_LinkExtract*)link_page; |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 291 | } |