Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 1 | /* NOTE: this API is -ONLY- for use with single byte character strings. */ |
| 2 | /* Do not use it with Unicode. */ |
| 3 | |
| 4 | #include "bytes_methods.h" |
| 5 | |
| 6 | #ifndef STRINGLIB_MUTABLE |
| 7 | #warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" |
| 8 | #define STRINGLIB_MUTABLE 0 |
| 9 | #endif |
| 10 | |
| 11 | /* the more complicated methods. parts of these should be pulled out into the |
| 12 | shared code in bytes_methods.c to cut down on duplicate code bloat. */ |
| 13 | |
| 14 | PyDoc_STRVAR(expandtabs__doc__, |
Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 15 | "B.expandtabs([tabsize]) -> copy of B\n\ |
Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 16 | \n\ |
| 17 | Return a copy of B where all tab characters are expanded using spaces.\n\ |
| 18 | If tabsize is not given, a tab size of 8 characters is assumed."); |
| 19 | |
| 20 | static PyObject* |
| 21 | stringlib_expandtabs(PyObject *self, PyObject *args) |
| 22 | { |
| 23 | const char *e, *p; |
| 24 | char *q; |
| 25 | Py_ssize_t i, j, old_j; |
| 26 | PyObject *u; |
| 27 | int tabsize = 8; |
| 28 | |
| 29 | if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) |
| 30 | return NULL; |
| 31 | |
| 32 | /* First pass: determine size of output string */ |
| 33 | i = j = old_j = 0; |
| 34 | e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); |
| 35 | for (p = STRINGLIB_STR(self); p < e; p++) |
| 36 | if (*p == '\t') { |
| 37 | if (tabsize > 0) { |
| 38 | j += tabsize - (j % tabsize); |
| 39 | /* XXX: this depends on a signed integer overflow to < 0 */ |
| 40 | /* C compilers, including gcc, do -NOT- guarantee this. */ |
| 41 | if (old_j > j) { |
| 42 | PyErr_SetString(PyExc_OverflowError, |
| 43 | "result is too long"); |
| 44 | return NULL; |
| 45 | } |
| 46 | old_j = j; |
| 47 | } |
| 48 | } |
| 49 | else { |
| 50 | j++; |
| 51 | if (*p == '\n' || *p == '\r') { |
| 52 | i += j; |
| 53 | old_j = j = 0; |
| 54 | /* XXX: this depends on a signed integer overflow to < 0 */ |
| 55 | /* C compilers, including gcc, do -NOT- guarantee this. */ |
| 56 | if (i < 0) { |
| 57 | PyErr_SetString(PyExc_OverflowError, |
| 58 | "result is too long"); |
| 59 | return NULL; |
| 60 | } |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | if ((i + j) < 0) { |
| 65 | /* XXX: this depends on a signed integer overflow to < 0 */ |
| 66 | /* C compilers, including gcc, do -NOT- guarantee this. */ |
| 67 | PyErr_SetString(PyExc_OverflowError, "result is too long"); |
| 68 | return NULL; |
| 69 | } |
| 70 | |
| 71 | /* Second pass: create output string and fill it */ |
| 72 | u = STRINGLIB_NEW(NULL, i + j); |
| 73 | if (!u) |
| 74 | return NULL; |
| 75 | |
| 76 | j = 0; |
| 77 | q = STRINGLIB_STR(u); |
| 78 | |
| 79 | for (p = STRINGLIB_STR(self); p < e; p++) |
| 80 | if (*p == '\t') { |
| 81 | if (tabsize > 0) { |
| 82 | i = tabsize - (j % tabsize); |
| 83 | j += i; |
| 84 | while (i--) |
| 85 | *q++ = ' '; |
| 86 | } |
| 87 | } |
| 88 | else { |
| 89 | j++; |
| 90 | *q++ = *p; |
| 91 | if (*p == '\n' || *p == '\r') |
| 92 | j = 0; |
| 93 | } |
| 94 | |
| 95 | return u; |
| 96 | } |
| 97 | |
| 98 | Py_LOCAL_INLINE(PyObject *) |
| 99 | pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) |
| 100 | { |
| 101 | PyObject *u; |
| 102 | |
| 103 | if (left < 0) |
| 104 | left = 0; |
| 105 | if (right < 0) |
| 106 | right = 0; |
| 107 | |
| 108 | if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { |
| 109 | #if STRINGLIB_MUTABLE |
| 110 | /* We're defined as returning a copy; If the object is mutable |
| 111 | * that means we must make an identical copy. */ |
| 112 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 113 | #else |
| 114 | Py_INCREF(self); |
| 115 | return (PyObject *)self; |
| 116 | #endif /* STRINGLIB_MUTABLE */ |
| 117 | } |
| 118 | |
| 119 | u = STRINGLIB_NEW(NULL, |
| 120 | left + STRINGLIB_LEN(self) + right); |
| 121 | if (u) { |
| 122 | if (left) |
| 123 | memset(STRINGLIB_STR(u), fill, left); |
| 124 | Py_MEMCPY(STRINGLIB_STR(u) + left, |
| 125 | STRINGLIB_STR(self), |
| 126 | STRINGLIB_LEN(self)); |
| 127 | if (right) |
| 128 | memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), |
| 129 | fill, right); |
| 130 | } |
| 131 | |
| 132 | return u; |
| 133 | } |
| 134 | |
| 135 | PyDoc_STRVAR(ljust__doc__, |
Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 136 | "B.ljust(width[, fillchar]) -> copy of B\n" |
Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 137 | "\n" |
| 138 | "Return B left justified in a string of length width. Padding is\n" |
| 139 | "done using the specified fill character (default is a space)."); |
| 140 | |
| 141 | static PyObject * |
| 142 | stringlib_ljust(PyObject *self, PyObject *args) |
| 143 | { |
| 144 | Py_ssize_t width; |
| 145 | char fillchar = ' '; |
| 146 | |
| 147 | if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) |
| 148 | return NULL; |
| 149 | |
| 150 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
| 151 | #if STRINGLIB_MUTABLE |
| 152 | /* We're defined as returning a copy; If the object is mutable |
| 153 | * that means we must make an identical copy. */ |
| 154 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 155 | #else |
| 156 | Py_INCREF(self); |
| 157 | return (PyObject*) self; |
| 158 | #endif |
| 159 | } |
| 160 | |
| 161 | return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); |
| 162 | } |
| 163 | |
| 164 | |
| 165 | PyDoc_STRVAR(rjust__doc__, |
Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 166 | "B.rjust(width[, fillchar]) -> copy of B\n" |
Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 167 | "\n" |
| 168 | "Return B right justified in a string of length width. Padding is\n" |
| 169 | "done using the specified fill character (default is a space)"); |
| 170 | |
| 171 | static PyObject * |
| 172 | stringlib_rjust(PyObject *self, PyObject *args) |
| 173 | { |
| 174 | Py_ssize_t width; |
| 175 | char fillchar = ' '; |
| 176 | |
| 177 | if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) |
| 178 | return NULL; |
| 179 | |
| 180 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
| 181 | #if STRINGLIB_MUTABLE |
| 182 | /* We're defined as returning a copy; If the object is mutable |
| 183 | * that means we must make an identical copy. */ |
| 184 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 185 | #else |
| 186 | Py_INCREF(self); |
| 187 | return (PyObject*) self; |
| 188 | #endif |
| 189 | } |
| 190 | |
| 191 | return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); |
| 192 | } |
| 193 | |
| 194 | |
| 195 | PyDoc_STRVAR(center__doc__, |
Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 196 | "B.center(width[, fillchar]) -> copy of B\n" |
Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 197 | "\n" |
Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 198 | "Return B centered in a string of length width. Padding is\n" |
| 199 | "done using the specified fill character (default is a space)."); |
Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 200 | |
| 201 | static PyObject * |
| 202 | stringlib_center(PyObject *self, PyObject *args) |
| 203 | { |
| 204 | Py_ssize_t marg, left; |
| 205 | Py_ssize_t width; |
| 206 | char fillchar = ' '; |
| 207 | |
| 208 | if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) |
| 209 | return NULL; |
| 210 | |
| 211 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { |
| 212 | #if STRINGLIB_MUTABLE |
| 213 | /* We're defined as returning a copy; If the object is mutable |
| 214 | * that means we must make an identical copy. */ |
| 215 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 216 | #else |
| 217 | Py_INCREF(self); |
| 218 | return (PyObject*) self; |
| 219 | #endif |
| 220 | } |
| 221 | |
| 222 | marg = width - STRINGLIB_LEN(self); |
| 223 | left = marg / 2 + (marg & width & 1); |
| 224 | |
| 225 | return pad(self, left, marg - left, fillchar); |
| 226 | } |
| 227 | |
| 228 | PyDoc_STRVAR(zfill__doc__, |
Guido van Rossum | 98297ee | 2007-11-06 21:34:58 +0000 | [diff] [blame] | 229 | "B.zfill(width) -> copy of B\n" |
Gregory P. Smith | 60d241f | 2007-10-16 06:31:30 +0000 | [diff] [blame] | 230 | "\n" |
| 231 | "Pad a numeric string B with zeros on the left, to fill a field\n" |
| 232 | "of the specified width. B is never truncated."); |
| 233 | |
| 234 | static PyObject * |
| 235 | stringlib_zfill(PyObject *self, PyObject *args) |
| 236 | { |
| 237 | Py_ssize_t fill; |
| 238 | PyObject *s; |
| 239 | char *p; |
| 240 | Py_ssize_t width; |
| 241 | |
| 242 | if (!PyArg_ParseTuple(args, "n:zfill", &width)) |
| 243 | return NULL; |
| 244 | |
| 245 | if (STRINGLIB_LEN(self) >= width) { |
| 246 | if (STRINGLIB_CHECK_EXACT(self)) { |
| 247 | #if STRINGLIB_MUTABLE |
| 248 | /* We're defined as returning a copy; If the object is mutable |
| 249 | * that means we must make an identical copy. */ |
| 250 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
| 251 | #else |
| 252 | Py_INCREF(self); |
| 253 | return (PyObject*) self; |
| 254 | #endif |
| 255 | } |
| 256 | else |
| 257 | return STRINGLIB_NEW( |
| 258 | STRINGLIB_STR(self), |
| 259 | STRINGLIB_LEN(self) |
| 260 | ); |
| 261 | } |
| 262 | |
| 263 | fill = width - STRINGLIB_LEN(self); |
| 264 | |
| 265 | s = pad(self, fill, 0, '0'); |
| 266 | |
| 267 | if (s == NULL) |
| 268 | return NULL; |
| 269 | |
| 270 | p = STRINGLIB_STR(s); |
| 271 | if (p[fill] == '+' || p[fill] == '-') { |
| 272 | /* move sign to beginning of string */ |
| 273 | p[0] = p[fill]; |
| 274 | p[fill] = '0'; |
| 275 | } |
| 276 | |
| 277 | return (PyObject*) s; |
| 278 | } |
| 279 | |
| 280 | |
| 281 | #define _STRINGLIB_SPLIT_APPEND(data, left, right) \ |
| 282 | str = STRINGLIB_NEW((data) + (left), \ |
| 283 | (right) - (left)); \ |
| 284 | if (str == NULL) \ |
| 285 | goto onError; \ |
| 286 | if (PyList_Append(list, str)) { \ |
| 287 | Py_DECREF(str); \ |
| 288 | goto onError; \ |
| 289 | } \ |
| 290 | else \ |
| 291 | Py_DECREF(str); |
| 292 | |
| 293 | PyDoc_STRVAR(splitlines__doc__, |
| 294 | "B.splitlines([keepends]) -> list of lines\n\ |
| 295 | \n\ |
| 296 | Return a list of the lines in B, breaking at line boundaries.\n\ |
| 297 | Line breaks are not included in the resulting list unless keepends\n\ |
| 298 | is given and true."); |
| 299 | |
| 300 | static PyObject* |
| 301 | stringlib_splitlines(PyObject *self, PyObject *args) |
| 302 | { |
| 303 | register Py_ssize_t i; |
| 304 | register Py_ssize_t j; |
| 305 | Py_ssize_t len; |
| 306 | int keepends = 0; |
| 307 | PyObject *list; |
| 308 | PyObject *str; |
| 309 | char *data; |
| 310 | |
| 311 | if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) |
| 312 | return NULL; |
| 313 | |
| 314 | data = STRINGLIB_STR(self); |
| 315 | len = STRINGLIB_LEN(self); |
| 316 | |
| 317 | /* This does not use the preallocated list because splitlines is |
| 318 | usually run with hundreds of newlines. The overhead of |
| 319 | switching between PyList_SET_ITEM and append causes about a |
| 320 | 2-3% slowdown for that common case. A smarter implementation |
| 321 | could move the if check out, so the SET_ITEMs are done first |
| 322 | and the appends only done when the prealloc buffer is full. |
| 323 | That's too much work for little gain.*/ |
| 324 | |
| 325 | list = PyList_New(0); |
| 326 | if (!list) |
| 327 | goto onError; |
| 328 | |
| 329 | for (i = j = 0; i < len; ) { |
| 330 | Py_ssize_t eol; |
| 331 | |
| 332 | /* Find a line and append it */ |
| 333 | while (i < len && data[i] != '\n' && data[i] != '\r') |
| 334 | i++; |
| 335 | |
| 336 | /* Skip the line break reading CRLF as one line break */ |
| 337 | eol = i; |
| 338 | if (i < len) { |
| 339 | if (data[i] == '\r' && i + 1 < len && |
| 340 | data[i+1] == '\n') |
| 341 | i += 2; |
| 342 | else |
| 343 | i++; |
| 344 | if (keepends) |
| 345 | eol = i; |
| 346 | } |
| 347 | _STRINGLIB_SPLIT_APPEND(data, j, eol); |
| 348 | j = i; |
| 349 | } |
| 350 | if (j < len) { |
| 351 | _STRINGLIB_SPLIT_APPEND(data, j, len); |
| 352 | } |
| 353 | |
| 354 | return list; |
| 355 | |
| 356 | onError: |
| 357 | Py_XDECREF(list); |
| 358 | return NULL; |
| 359 | } |
| 360 | |
| 361 | #undef _STRINGLIB_SPLIT_APPEND |
| 362 | |