Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1 | /* NOTE: this API is -ONLY- for use with single byte character strings. */ |
| 2 | /* Do not use it with Unicode. */ |
| 3 | |
| 4 | #include "bytes_methods.h" |
| 5 | |
| 6 | #ifndef STRINGLIB_MUTABLE |
| 7 | #warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" |
| 8 | #define STRINGLIB_MUTABLE 0 |
| 9 | #endif |
| 10 | |
| 11 | /* the more complicated methods. parts of these should be pulled out into the |
| 12 | shared code in bytes_methods.c to cut down on duplicate code bloat. */ |
| 13 | |
| 14 | PyDoc_STRVAR(expandtabs__doc__, |
| 15 | "B.expandtabs([tabsize]) -> copy of B\n\ |
| 16 | \n\ |
| 17 | Return a copy of B where all tab characters are expanded using spaces.\n\ |
| 18 | If tabsize is not given, a tab size of 8 characters is assumed."); |
| 19 | |
| 20 | static PyObject* |
| 21 | stringlib_expandtabs(PyObject *self, PyObject *args) |
| 22 | { |
| 23 | const char *e, *p; |
| 24 | char *q; |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 25 | size_t i, j; |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 26 | PyObject *u; |
| 27 | int tabsize = 8; |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 28 | |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 29 | if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 30 | return NULL; |
| 31 | |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 32 | /* First pass: determine size of output string */ |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 33 | i = j = 0; |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 34 | e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); |
| 35 | for (p = STRINGLIB_STR(self); p < e; p++) |
| 36 | if (*p == '\t') { |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 37 | if (tabsize > 0) { |
| 38 | j += tabsize - (j % tabsize); |
| 39 | if (j > PY_SSIZE_T_MAX) { |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 40 | PyErr_SetString(PyExc_OverflowError, |
| 41 | "result is too long"); |
| 42 | return NULL; |
| 43 | } |
| 44 | } |
| 45 | } |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 46 | else { |
| 47 | j++; |
| 48 | if (*p == '\n' || *p == '\r') { |
| 49 | i += j; |
| 50 | j = 0; |
| 51 | if (i > PY_SSIZE_T_MAX) { |
| 52 | PyErr_SetString(PyExc_OverflowError, |
| 53 | "result is too long"); |
| 54 | return NULL; |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | if ((i + j) > PY_SSIZE_T_MAX) { |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 60 | PyErr_SetString(PyExc_OverflowError, "result is too long"); |
| 61 | return NULL; |
| 62 | } |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 63 | |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 64 | /* Second pass: create output string and fill it */ |
| 65 | u = STRINGLIB_NEW(NULL, i + j); |
| 66 | if (!u) |
| 67 | return NULL; |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 68 | |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 69 | j = 0; |
| 70 | q = STRINGLIB_STR(u); |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 71 | |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 72 | for (p = STRINGLIB_STR(self); p < e; p++) |
| 73 | if (*p == '\t') { |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 74 | if (tabsize > 0) { |
| 75 | i = tabsize - (j % tabsize); |
| 76 | j += i; |
| 77 | while (i--) |
| 78 | *q++ = ' '; |
| 79 | } |
| 80 | } |
| 81 | else { |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 82 | j++; |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 83 | *q++ = *p; |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 84 | if (*p == '\n' || *p == '\r') |
| 85 | j = 0; |
| 86 | } |
Antoine Pitrou | a1b22ce | 2009-01-13 23:25:47 +0000 | [diff] [blame] | 87 | |
Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 88 | return u; |
| 89 | } |
| 90 | |
| 91 | Py_LOCAL_INLINE(PyObject *) |
| 92 | pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) |
| 93 | { |
| 94 | PyObject *u; |
| 95 | |
| 96 | if (left < 0) |
| 97 | left = 0; |
| 98 | if (right < 0) |
| 99 | right = 0; |
| 100 | |
| 101 | if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { |
| 102 | #if STRINGLIB_MUTABLE |
| 103 | /* We're defined as returning a copy; If the object is mutable |
| 104 | * that means we must make an identical copy. */ |
| 105 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 106 | #else |
| 107 | Py_INCREF(self); |
| 108 | return (PyObject *)self; |
| 109 | #endif /* STRINGLIB_MUTABLE */ |
| 110 | } |
| 111 | |
| 112 | u = STRINGLIB_NEW(NULL, |
| 113 | left + STRINGLIB_LEN(self) + right); |
| 114 | if (u) { |
| 115 | if (left) |
| 116 | memset(STRINGLIB_STR(u), fill, left); |
| 117 | Py_MEMCPY(STRINGLIB_STR(u) + left, |
| 118 | STRINGLIB_STR(self), |
| 119 | STRINGLIB_LEN(self)); |
| 120 | if (right) |
| 121 | memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), |
| 122 | fill, right); |
| 123 | } |
| 124 | |
| 125 | return u; |
| 126 | } |
| 127 | |
| 128 | PyDoc_STRVAR(ljust__doc__, |
| 129 | "B.ljust(width[, fillchar]) -> copy of B\n" |
| 130 | "\n" |
| 131 | "Return B left justified in a string of length width. Padding is\n" |
| 132 | "done using the specified fill character (default is a space)."); |
| 133 | |
| 134 | static PyObject * |
| 135 | stringlib_ljust(PyObject *self, PyObject *args) |
| 136 | { |
| 137 | Py_ssize_t width; |
| 138 | char fillchar = ' '; |
| 139 | |
| 140 | if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) |
| 141 | return NULL; |
| 142 | |
| 143 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
| 144 | #if STRINGLIB_MUTABLE |
| 145 | /* We're defined as returning a copy; If the object is mutable |
| 146 | * that means we must make an identical copy. */ |
| 147 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 148 | #else |
| 149 | Py_INCREF(self); |
| 150 | return (PyObject*) self; |
| 151 | #endif |
| 152 | } |
| 153 | |
| 154 | return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); |
| 155 | } |
| 156 | |
| 157 | |
| 158 | PyDoc_STRVAR(rjust__doc__, |
| 159 | "B.rjust(width[, fillchar]) -> copy of B\n" |
| 160 | "\n" |
| 161 | "Return B right justified in a string of length width. Padding is\n" |
| 162 | "done using the specified fill character (default is a space)"); |
| 163 | |
| 164 | static PyObject * |
| 165 | stringlib_rjust(PyObject *self, PyObject *args) |
| 166 | { |
| 167 | Py_ssize_t width; |
| 168 | char fillchar = ' '; |
| 169 | |
| 170 | if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) |
| 171 | return NULL; |
| 172 | |
| 173 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
| 174 | #if STRINGLIB_MUTABLE |
| 175 | /* We're defined as returning a copy; If the object is mutable |
| 176 | * that means we must make an identical copy. */ |
| 177 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 178 | #else |
| 179 | Py_INCREF(self); |
| 180 | return (PyObject*) self; |
| 181 | #endif |
| 182 | } |
| 183 | |
| 184 | return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); |
| 185 | } |
| 186 | |
| 187 | |
| 188 | PyDoc_STRVAR(center__doc__, |
| 189 | "B.center(width[, fillchar]) -> copy of B\n" |
| 190 | "\n" |
| 191 | "Return B centered in a string of length width. Padding is\n" |
| 192 | "done using the specified fill character (default is a space)."); |
| 193 | |
| 194 | static PyObject * |
| 195 | stringlib_center(PyObject *self, PyObject *args) |
| 196 | { |
| 197 | Py_ssize_t marg, left; |
| 198 | Py_ssize_t width; |
| 199 | char fillchar = ' '; |
| 200 | |
| 201 | if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) |
| 202 | return NULL; |
| 203 | |
| 204 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
| 205 | #if STRINGLIB_MUTABLE |
| 206 | /* We're defined as returning a copy; If the object is mutable |
| 207 | * that means we must make an identical copy. */ |
| 208 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 209 | #else |
| 210 | Py_INCREF(self); |
| 211 | return (PyObject*) self; |
| 212 | #endif |
| 213 | } |
| 214 | |
| 215 | marg = width - STRINGLIB_LEN(self); |
| 216 | left = marg / 2 + (marg & width & 1); |
| 217 | |
| 218 | return pad(self, left, marg - left, fillchar); |
| 219 | } |
| 220 | |
| 221 | PyDoc_STRVAR(zfill__doc__, |
| 222 | "B.zfill(width) -> copy of B\n" |
| 223 | "\n" |
| 224 | "Pad a numeric string B with zeros on the left, to fill a field\n" |
| 225 | "of the specified width. B is never truncated."); |
| 226 | |
| 227 | static PyObject * |
| 228 | stringlib_zfill(PyObject *self, PyObject *args) |
| 229 | { |
| 230 | Py_ssize_t fill; |
| 231 | PyObject *s; |
| 232 | char *p; |
| 233 | Py_ssize_t width; |
| 234 | |
| 235 | if (!PyArg_ParseTuple(args, "n:zfill", &width)) |
| 236 | return NULL; |
| 237 | |
| 238 | if (STRINGLIB_LEN(self) >= width) { |
| 239 | if (STRINGLIB_CHECK_EXACT(self)) { |
| 240 | #if STRINGLIB_MUTABLE |
| 241 | /* We're defined as returning a copy; If the object is mutable |
| 242 | * that means we must make an identical copy. */ |
| 243 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 244 | #else |
| 245 | Py_INCREF(self); |
| 246 | return (PyObject*) self; |
| 247 | #endif |
| 248 | } |
| 249 | else |
| 250 | return STRINGLIB_NEW( |
| 251 | STRINGLIB_STR(self), |
| 252 | STRINGLIB_LEN(self) |
| 253 | ); |
| 254 | } |
| 255 | |
| 256 | fill = width - STRINGLIB_LEN(self); |
| 257 | |
| 258 | s = pad(self, fill, 0, '0'); |
| 259 | |
| 260 | if (s == NULL) |
| 261 | return NULL; |
| 262 | |
| 263 | p = STRINGLIB_STR(s); |
| 264 | if (p[fill] == '+' || p[fill] == '-') { |
| 265 | /* move sign to beginning of string */ |
| 266 | p[0] = p[fill]; |
| 267 | p[fill] = '0'; |
| 268 | } |
| 269 | |
| 270 | return (PyObject*) s; |
| 271 | } |
| 272 | |
| 273 | |
| 274 | #define _STRINGLIB_SPLIT_APPEND(data, left, right) \ |
| 275 | str = STRINGLIB_NEW((data) + (left), \ |
| 276 | (right) - (left)); \ |
| 277 | if (str == NULL) \ |
| 278 | goto onError; \ |
| 279 | if (PyList_Append(list, str)) { \ |
| 280 | Py_DECREF(str); \ |
| 281 | goto onError; \ |
| 282 | } \ |
| 283 | else \ |
| 284 | Py_DECREF(str); |
| 285 | |
| 286 | PyDoc_STRVAR(splitlines__doc__, |
| 287 | "B.splitlines([keepends]) -> list of lines\n\ |
| 288 | \n\ |
| 289 | Return a list of the lines in B, breaking at line boundaries.\n\ |
| 290 | Line breaks are not included in the resulting list unless keepends\n\ |
| 291 | is given and true."); |
| 292 | |
| 293 | static PyObject* |
| 294 | stringlib_splitlines(PyObject *self, PyObject *args) |
| 295 | { |
| 296 | register Py_ssize_t i; |
| 297 | register Py_ssize_t j; |
| 298 | Py_ssize_t len; |
| 299 | int keepends = 0; |
| 300 | PyObject *list; |
| 301 | PyObject *str; |
| 302 | char *data; |
| 303 | |
| 304 | if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) |
| 305 | return NULL; |
| 306 | |
| 307 | data = STRINGLIB_STR(self); |
| 308 | len = STRINGLIB_LEN(self); |
| 309 | |
| 310 | /* This does not use the preallocated list because splitlines is |
| 311 | usually run with hundreds of newlines. The overhead of |
| 312 | switching between PyList_SET_ITEM and append causes about a |
| 313 | 2-3% slowdown for that common case. A smarter implementation |
| 314 | could move the if check out, so the SET_ITEMs are done first |
| 315 | and the appends only done when the prealloc buffer is full. |
| 316 | That's too much work for little gain.*/ |
| 317 | |
| 318 | list = PyList_New(0); |
| 319 | if (!list) |
| 320 | goto onError; |
| 321 | |
| 322 | for (i = j = 0; i < len; ) { |
| 323 | Py_ssize_t eol; |
| 324 | |
| 325 | /* Find a line and append it */ |
| 326 | while (i < len && data[i] != '\n' && data[i] != '\r') |
| 327 | i++; |
| 328 | |
| 329 | /* Skip the line break reading CRLF as one line break */ |
| 330 | eol = i; |
| 331 | if (i < len) { |
| 332 | if (data[i] == '\r' && i + 1 < len && |
| 333 | data[i+1] == '\n') |
| 334 | i += 2; |
| 335 | else |
| 336 | i++; |
| 337 | if (keepends) |
| 338 | eol = i; |
| 339 | } |
| 340 | _STRINGLIB_SPLIT_APPEND(data, j, eol); |
| 341 | j = i; |
| 342 | } |
| 343 | if (j < len) { |
| 344 | _STRINGLIB_SPLIT_APPEND(data, j, len); |
| 345 | } |
| 346 | |
| 347 | return list; |
| 348 | |
| 349 | onError: |
| 350 | Py_XDECREF(list); |
| 351 | return NULL; |
| 352 | } |
| 353 | |
| 354 | #undef _STRINGLIB_SPLIT_APPEND |
| 355 | |