John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 1 | // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 4 | |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 5 | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 7 | #ifndef PUBLIC_FPDF_TEXT_H_ |
| 8 | #define PUBLIC_FPDF_TEXT_H_ |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 9 | |
Dan Sinclair | 85c8e7f | 2016-11-21 13:50:32 -0500 | [diff] [blame] | 10 | // NOLINTNEXTLINE(build/include) |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 11 | #include "fpdfview.h" |
| 12 | |
| 13 | // Exported Functions |
| 14 | #ifdef __cplusplus |
| 15 | extern "C" { |
| 16 | #endif |
| 17 | |
| 18 | // Function: FPDFText_LoadPage |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 19 | // Prepare information about all characters in a page. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 20 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 21 | // page - Handle to the page. Returned by FPDF_LoadPage function |
| 22 | // (in FPDFVIEW module). |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 23 | // Return value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 24 | // A handle to the text page information structure. |
| 25 | // NULL if something goes wrong. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 26 | // Comments: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 27 | // Application must call FPDFText_ClosePage to release the text page |
| 28 | // information. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 29 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 30 | FPDF_EXPORT FPDF_TEXTPAGE FPDF_CALLCONV FPDFText_LoadPage(FPDF_PAGE page); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 31 | |
| 32 | // Function: FPDFText_ClosePage |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 33 | // Release all resources allocated for a text page information |
| 34 | // structure. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 35 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 36 | // text_page - Handle to a text page information structure. |
| 37 | // Returned by FPDFText_LoadPage function. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 38 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 39 | // None. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 40 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 41 | FPDF_EXPORT void FPDF_CALLCONV FPDFText_ClosePage(FPDF_TEXTPAGE text_page); |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 42 | |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 43 | // Function: FPDFText_CountChars |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 44 | // Get number of characters in a page. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 45 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 46 | // text_page - Handle to a text page information structure. |
| 47 | // Returned by FPDFText_LoadPage function. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 48 | // Return value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 49 | // Number of characters in the page. Return -1 for error. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 50 | // Generated characters, like additional space characters, new line |
| 51 | // characters, are also counted. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 52 | // Comments: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 53 | // Characters in a page form a "stream", inside the stream, each |
| 54 | // character has an index. |
| 55 | // We will use the index parameters in many of FPDFTEXT functions. The |
| 56 | // first character in the page |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 57 | // has an index value of zero. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 58 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 59 | FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountChars(FPDF_TEXTPAGE text_page); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 60 | |
| 61 | // Function: FPDFText_GetUnicode |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 62 | // Get Unicode of a character in a page. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 63 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 64 | // text_page - Handle to a text page information structure. |
| 65 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 66 | // index - Zero-based index of the character. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 67 | // Return value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 68 | // The Unicode of the particular character. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 69 | // If a character is not encoded in Unicode and Foxit engine can't |
| 70 | // convert to Unicode, |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 71 | // the return value will be zero. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 72 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 73 | FPDF_EXPORT unsigned int FPDF_CALLCONV |
| 74 | FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 75 | |
| 76 | // Function: FPDFText_GetFontSize |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 77 | // Get the font size of a particular character. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 78 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 79 | // text_page - Handle to a text page information structure. |
| 80 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 81 | // index - Zero-based index of the character. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 82 | // Return value: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 83 | // The font size of the particular character, measured in points (about |
| 84 | // 1/72 inch). |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 85 | // This is the typographic size of the font (so called "em size"). |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 86 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 87 | FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, |
| 88 | int index); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 89 | |
| 90 | // Function: FPDFText_GetCharBox |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 91 | // Get bounding box of a particular character. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 92 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 93 | // text_page - Handle to a text page information structure. |
| 94 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 95 | // index - Zero-based index of the character. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 96 | // left - Pointer to a double number receiving left position |
| 97 | // of the character box. |
| 98 | // right - Pointer to a double number receiving right position |
| 99 | // of the character box. |
| 100 | // bottom - Pointer to a double number receiving bottom position |
| 101 | // of the character box. |
| 102 | // top - Pointer to a double number receiving top position of |
| 103 | // the character box. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 104 | // Return Value: |
Lei Zhang | 241752f | 2018-01-11 14:34:06 +0000 | [diff] [blame] | 105 | // On success, return TRUE and fill in |left|, |right|, |bottom|, and |
| 106 | // |top|. If |text_page| is invalid, or if |index| is out of bounds, |
| 107 | // then return FALSE, and the out parameters remain unmodified. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 108 | // Comments: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 109 | // All positions are measured in PDF "user space". |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 110 | // |
Lei Zhang | 241752f | 2018-01-11 14:34:06 +0000 | [diff] [blame] | 111 | FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, |
| 112 | int index, |
| 113 | double* left, |
| 114 | double* right, |
| 115 | double* bottom, |
| 116 | double* top); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 117 | |
Andrew Weintraub | d300234 | 2017-08-11 11:36:51 -0400 | [diff] [blame] | 118 | // Function: FPDFText_GetCharOrigin |
| 119 | // Get origin of a particular character. |
| 120 | // Parameters: |
| 121 | // text_page - Handle to a text page information structure. |
| 122 | // Returned by FPDFText_LoadPage function. |
| 123 | // index - Zero-based index of the character. |
| 124 | // x - Pointer to a double number receiving x coordinate of |
| 125 | // the character origin. |
| 126 | // y - Pointer to a double number receiving y coordinate of |
| 127 | // the character origin. |
| 128 | // Return Value: |
| 129 | // Whether the call succeeded. If false, x and y are unchanged. |
| 130 | // Comments: |
| 131 | // All positions are measured in PDF "user space". |
| 132 | // |
| 133 | FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV |
| 134 | FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page, |
| 135 | int index, |
| 136 | double* x, |
| 137 | double* y); |
| 138 | |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 139 | // Function: FPDFText_GetCharIndexAtPos |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 140 | // Get the index of a character at or nearby a certain position on the |
| 141 | // page. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 142 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 143 | // text_page - Handle to a text page information structure. |
| 144 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 145 | // x - X position in PDF "user space". |
| 146 | // y - Y position in PDF "user space". |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 147 | // xTolerance - An x-axis tolerance value for character hit |
| 148 | // detection, in point unit. |
| 149 | // yTolerance - A y-axis tolerance value for character hit |
| 150 | // detection, in point unit. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 151 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 152 | // The zero-based index of the character at, or nearby the point (x,y). |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 153 | // If there is no character at or nearby the point, return value will |
| 154 | // be -1. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 155 | // If an error occurs, -3 will be returned. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 156 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 157 | FPDF_EXPORT int FPDF_CALLCONV |
| 158 | FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, |
| 159 | double x, |
| 160 | double y, |
| 161 | double xTolerance, |
| 162 | double yTolerance); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 163 | |
| 164 | // Function: FPDFText_GetText |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 165 | // Extract unicode text string from the page. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 166 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 167 | // text_page - Handle to a text page information structure. |
| 168 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 169 | // start_index - Index for the start characters. |
| 170 | // count - Number of characters to be extracted. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 171 | // result - A buffer (allocated by application) receiving the |
| 172 | // extracted unicodes. |
| 173 | // The size of the buffer must be able to hold the |
| 174 | // number of characters plus a terminator. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 175 | // Return Value: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 176 | // Number of characters written into the result buffer, including the |
| 177 | // trailing terminator. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 178 | // Comments: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 179 | // This function ignores characters without unicode information. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 180 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 181 | FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page, |
| 182 | int start_index, |
| 183 | int count, |
| 184 | unsigned short* result); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 185 | |
| 186 | // Function: FPDFText_CountRects |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 187 | // Count number of rectangular areas occupied by a segment of texts. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 188 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 189 | // text_page - Handle to a text page information structure. |
| 190 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 191 | // start_index - Index for the start characters. |
| 192 | // count - Number of characters. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 193 | // Return value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 194 | // Number of rectangles. Zero for error. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 195 | // Comments: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 196 | // This function, along with FPDFText_GetRect can be used by |
| 197 | // applications to detect the position |
| 198 | // on the page for a text segment, so proper areas can be highlighted |
| 199 | // or something. |
| 200 | // FPDFTEXT will automatically merge small character boxes into bigger |
| 201 | // one if those characters |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 202 | // are on the same line and use same font settings. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 203 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 204 | FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page, |
| 205 | int start_index, |
| 206 | int count); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 207 | |
| 208 | // Function: FPDFText_GetRect |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 209 | // Get a rectangular area from the result generated by |
| 210 | // FPDFText_CountRects. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 211 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 212 | // text_page - Handle to a text page information structure. |
| 213 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 214 | // rect_index - Zero-based index for the rectangle. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 215 | // left - Pointer to a double value receiving the rectangle |
| 216 | // left boundary. |
| 217 | // top - Pointer to a double value receiving the rectangle |
| 218 | // top boundary. |
| 219 | // right - Pointer to a double value receiving the rectangle |
| 220 | // right boundary. |
| 221 | // bottom - Pointer to a double value receiving the rectangle |
| 222 | // bottom boundary. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 223 | // Return Value: |
Lei Zhang | 7625021 | 2018-01-11 14:28:01 +0000 | [diff] [blame] | 224 | // On success, return TRUE and fill in |left|, |top|, |right|, and |
| 225 | // |bottom|. If |link_page| is invalid then return FALSE, and the out |
| 226 | // parameters remain unmodified. If |link_page| is valid but |
| 227 | // |link_index| is out of bounds, then return FALSE and set the out |
| 228 | // parameters to 0. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 229 | // |
Lei Zhang | 7625021 | 2018-01-11 14:28:01 +0000 | [diff] [blame] | 230 | FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetRect(FPDF_TEXTPAGE text_page, |
| 231 | int rect_index, |
| 232 | double* left, |
| 233 | double* top, |
| 234 | double* right, |
| 235 | double* bottom); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 236 | |
| 237 | // Function: FPDFText_GetBoundedText |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 238 | // Extract unicode text within a rectangular boundary on the page. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 239 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 240 | // text_page - Handle to a text page information structure. |
| 241 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 242 | // left - Left boundary. |
| 243 | // top - Top boundary. |
| 244 | // right - Right boundary. |
| 245 | // bottom - Bottom boundary. |
| 246 | // buffer - A unicode buffer. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 247 | // buflen - Number of characters (not bytes) for the buffer, |
| 248 | // excluding an additional terminator. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 249 | // Return Value: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 250 | // If buffer is NULL or buflen is zero, return number of characters |
| 251 | // (not bytes) of text present within |
| 252 | // the rectangle, excluding a terminating NUL. Generally you should |
| 253 | // pass a buffer at least one larger |
| 254 | // than this if you want a terminating NUL, which will be provided if |
| 255 | // space is available. |
| 256 | // Otherwise, return number of characters copied into the buffer, |
| 257 | // including the terminating NUL |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 258 | // when space for it is available. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 259 | // Comment: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 260 | // If the buffer is too small, as much text as will fit is copied into |
| 261 | // it. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 262 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 263 | FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, |
| 264 | double left, |
| 265 | double top, |
| 266 | double right, |
| 267 | double bottom, |
| 268 | unsigned short* buffer, |
| 269 | int buflen); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 270 | |
| 271 | // Flags used by FPDFText_FindStart function. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 272 | #define FPDF_MATCHCASE \ |
| 273 | 0x00000001 // If not set, it will not match case by default. |
| 274 | #define FPDF_MATCHWHOLEWORD \ |
| 275 | 0x00000002 // If not set, it will not match the whole word by default. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 276 | |
| 277 | // Function: FPDFText_FindStart |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 278 | // Start a search. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 279 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 280 | // text_page - Handle to a text page information structure. |
| 281 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 282 | // findwhat - A unicode match pattern. |
| 283 | // flags - Option flags. |
| 284 | // start_index - Start from this character. -1 for end of the page. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 285 | // Return Value: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 286 | // A handle for the search context. FPDFText_FindClose must be called |
| 287 | // to release this handle. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 288 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 289 | FPDF_EXPORT FPDF_SCHHANDLE FPDF_CALLCONV |
| 290 | FPDFText_FindStart(FPDF_TEXTPAGE text_page, |
| 291 | FPDF_WIDESTRING findwhat, |
| 292 | unsigned long flags, |
| 293 | int start_index); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 294 | |
| 295 | // Function: FPDFText_FindNext |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 296 | // Search in the direction from page start to end. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 297 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 298 | // handle - A search context handle returned by |
| 299 | // FPDFText_FindStart. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 300 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 301 | // Whether a match is found. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 302 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 303 | FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindNext(FPDF_SCHHANDLE handle); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 304 | |
| 305 | // Function: FPDFText_FindPrev |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 306 | // Search in the direction from page end to start. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 307 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 308 | // handle - A search context handle returned by |
| 309 | // FPDFText_FindStart. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 310 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 311 | // Whether a match is found. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 312 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 313 | FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindPrev(FPDF_SCHHANDLE handle); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 314 | |
| 315 | // Function: FPDFText_GetSchResultIndex |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 316 | // Get the starting character index of the search result. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 317 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 318 | // handle - A search context handle returned by |
| 319 | // FPDFText_FindStart. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 320 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 321 | // Index for the starting character. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 322 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 323 | FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 324 | |
| 325 | // Function: FPDFText_GetSchCount |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 326 | // Get the number of matched characters in the search result. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 327 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 328 | // handle - A search context handle returned by |
| 329 | // FPDFText_FindStart. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 330 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 331 | // Number of matched characters. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 332 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 333 | FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchCount(FPDF_SCHHANDLE handle); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 334 | |
| 335 | // Function: FPDFText_FindClose |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 336 | // Release a search context. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 337 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 338 | // handle - A search context handle returned by |
| 339 | // FPDFText_FindStart. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 340 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 341 | // None. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 342 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 343 | FPDF_EXPORT void FPDF_CALLCONV FPDFText_FindClose(FPDF_SCHHANDLE handle); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 344 | |
| 345 | // Function: FPDFLink_LoadWebLinks |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 346 | // Prepare information about weblinks in a page. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 347 | // Parameters: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 348 | // text_page - Handle to a text page information structure. |
| 349 | // Returned by FPDFText_LoadPage function. |
Tom Sepez | 526f6d5 | 2015-01-28 15:49:13 -0800 | [diff] [blame] | 350 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 351 | // A handle to the page's links information structure. |
| 352 | // NULL if something goes wrong. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 353 | // Comments: |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 354 | // Weblinks are those links implicitly embedded in PDF pages. PDF also |
| 355 | // has a type of |
| 356 | // annotation called "link", FPDFTEXT doesn't deal with that kind of |
| 357 | // link. |
| 358 | // FPDFTEXT weblink feature is useful for automatically detecting links |
| 359 | // in the page |
| 360 | // contents. For example, things like "http://www.foxitsoftware.com" |
| 361 | // will be detected, |
| 362 | // so applications can allow user to click on those characters to |
| 363 | // activate the link, |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 364 | // even the PDF doesn't come with link annotations. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 365 | // |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 366 | // FPDFLink_CloseWebLinks must be called to release resources. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 367 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 368 | FPDF_EXPORT FPDF_PAGELINK FPDF_CALLCONV |
| 369 | FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 370 | |
| 371 | // Function: FPDFLink_CountWebLinks |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 372 | // Count number of detected web links. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 373 | // Parameters: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 374 | // link_page - Handle returned by FPDFLink_LoadWebLinks. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 375 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 376 | // Number of detected web links. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 377 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 378 | FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 379 | |
| 380 | // Function: FPDFLink_GetURL |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 381 | // Fetch the URL information for a detected web link. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 382 | // Parameters: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 383 | // link_page - Handle returned by FPDFLink_LoadWebLinks. |
| 384 | // link_index - Zero-based index for the link. |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 385 | // buffer - A unicode buffer for the result. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 386 | // buflen - Number of characters (not bytes) for the buffer, |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 387 | // including an additional terminator. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 388 | // Return Value: |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 389 | // If |buffer| is NULL or |buflen| is zero, return the number of |
| 390 | // characters (not bytes) needed to buffer the result (an additional |
| 391 | // terminator is included in this count). |
| 392 | // Otherwise, copy the result into |buffer|, truncating at |buflen| if |
| 393 | // the result is too large to fit, and return the number of characters |
| 394 | // actually copied into the buffer (the additional terminator is also |
| 395 | // included in this count). |
| 396 | // If |link_index| does not correspond to a valid link, then the result |
| 397 | // is an empty string. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 398 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 399 | FPDF_EXPORT int FPDF_CALLCONV FPDFLink_GetURL(FPDF_PAGELINK link_page, |
| 400 | int link_index, |
| 401 | unsigned short* buffer, |
| 402 | int buflen); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 403 | |
| 404 | // Function: FPDFLink_CountRects |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 405 | // Count number of rectangular areas for the link. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 406 | // Parameters: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 407 | // link_page - Handle returned by FPDFLink_LoadWebLinks. |
| 408 | // link_index - Zero-based index for the link. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 409 | // Return Value: |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 410 | // Number of rectangular areas for the link. If |link_index| does |
| 411 | // not correspond to a valid link, then 0 is returned. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 412 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 413 | FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountRects(FPDF_PAGELINK link_page, |
| 414 | int link_index); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 415 | |
| 416 | // Function: FPDFLink_GetRect |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 417 | // Fetch the boundaries of a rectangle for a link. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 418 | // Parameters: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 419 | // link_page - Handle returned by FPDFLink_LoadWebLinks. |
| 420 | // link_index - Zero-based index for the link. |
| 421 | // rect_index - Zero-based index for a rectangle. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 422 | // left - Pointer to a double value receiving the rectangle |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 423 | // left boundary. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 424 | // top - Pointer to a double value receiving the rectangle |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 425 | // top boundary. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 426 | // right - Pointer to a double value receiving the rectangle |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 427 | // right boundary. |
Nico Weber | 9d8ec5a | 2015-08-04 13:00:21 -0700 | [diff] [blame] | 428 | // bottom - Pointer to a double value receiving the rectangle |
tsepez | 6914118 | 2016-04-21 10:43:39 -0700 | [diff] [blame] | 429 | // bottom boundary. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 430 | // Return Value: |
Lei Zhang | 50bd8fa | 2018-01-10 17:33:06 +0000 | [diff] [blame] | 431 | // On success, return TRUE and fill in |left|, |top|, |right|, and |
Lei Zhang | 7625021 | 2018-01-11 14:28:01 +0000 | [diff] [blame] | 432 | // |bottom|. If |link_page| is invalid or if |link_index| does not |
| 433 | // correspond to a valid link, then return FALSE, and the out |
| 434 | // parameters remain unmodified. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 435 | // |
Lei Zhang | 50bd8fa | 2018-01-10 17:33:06 +0000 | [diff] [blame] | 436 | FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFLink_GetRect(FPDF_PAGELINK link_page, |
| 437 | int link_index, |
| 438 | int rect_index, |
| 439 | double* left, |
| 440 | double* top, |
| 441 | double* right, |
| 442 | double* bottom); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 443 | |
| 444 | // Function: FPDFLink_CloseWebLinks |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 445 | // Release resources used by weblink feature. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 446 | // Parameters: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 447 | // link_page - Handle returned by FPDFLink_LoadWebLinks. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 448 | // Return Value: |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 449 | // None. |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 450 | // |
Dan Sinclair | 00d2ad1 | 2017-08-10 14:13:02 -0400 | [diff] [blame] | 451 | FPDF_EXPORT void FPDF_CALLCONV FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 452 | |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 453 | #ifdef __cplusplus |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 454 | } |
John Abd-El-Malek | 3f3b45c | 2014-05-23 17:28:10 -0700 | [diff] [blame] | 455 | #endif |
| 456 | |
Tom Sepez | 9857e20 | 2015-05-13 17:09:26 -0700 | [diff] [blame] | 457 | #endif // PUBLIC_FPDF_TEXT_H_ |