| Guido van Rossum | f70e43a | 1991-02-19 12:39:46 +0000 | [diff] [blame] | 1 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2 | /* String object implementation */ | 
|  | 3 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 4 | #include "Python.h" | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 5 |  | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 6 | #include <ctype.h> | 
|  | 7 |  | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 8 | #ifdef COUNT_ALLOCS | 
|  | 9 | int null_strings, one_strings; | 
|  | 10 | #endif | 
|  | 11 |  | 
| Fred Drake | d5fadf7 | 2000-09-26 05:46:01 +0000 | [diff] [blame] | 12 | #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX) | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 13 | #define UCHAR_MAX 255 | 
|  | 14 | #endif | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 15 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 16 | static PyStringObject *characters[UCHAR_MAX + 1]; | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 17 | #ifndef DONT_SHARE_SHORT_STRINGS | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 18 | static PyStringObject *nullstring; | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 19 | #endif | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 20 |  | 
|  | 21 | /* | 
|  | 22 | Newsizedstringobject() and newstringobject() try in certain cases | 
|  | 23 | to share string objects.  When the size of the string is zero, | 
|  | 24 | these routines always return a pointer to the same string object; | 
|  | 25 | when the size is one, they return a pointer to an already existing | 
|  | 26 | object if the contents of the string is known.  For | 
|  | 27 | newstringobject() this is always the case, for | 
|  | 28 | newsizedstringobject() this is the case when the first argument in | 
|  | 29 | not NULL. | 
|  | 30 | A common practice to allocate a string and then fill it in or | 
|  | 31 | change it must be done carefully.  It is only allowed to change the | 
|  | 32 | contents of the string if the obect was gotten from | 
|  | 33 | newsizedstringobject() with a NULL first argument, because in the | 
|  | 34 | future these routines may try to do even more sharing of objects. | 
|  | 35 | */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 36 | PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 37 | PyString_FromStringAndSize(const char *str, int size) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 38 | { | 
| Tim Peters | 9e897f4 | 2001-05-09 07:37:07 +0000 | [diff] [blame] | 39 | register PyStringObject *op; | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 40 | #ifndef DONT_SHARE_SHORT_STRINGS | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 41 | if (size == 0 && (op = nullstring) != NULL) { | 
|  | 42 | #ifdef COUNT_ALLOCS | 
|  | 43 | null_strings++; | 
|  | 44 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 45 | Py_INCREF(op); | 
|  | 46 | return (PyObject *)op; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 47 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 48 | if (size == 1 && str != NULL && | 
|  | 49 | (op = characters[*str & UCHAR_MAX]) != NULL) | 
|  | 50 | { | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 51 | #ifdef COUNT_ALLOCS | 
|  | 52 | one_strings++; | 
|  | 53 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 54 | Py_INCREF(op); | 
|  | 55 | return (PyObject *)op; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 56 | } | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 57 | #endif /* DONT_SHARE_SHORT_STRINGS */ | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 58 |  | 
|  | 59 | /* PyObject_NewVar is inlined */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 60 | op = (PyStringObject *) | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 61 | PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char)); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 62 | if (op == NULL) | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 63 | return PyErr_NoMemory(); | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 64 | PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 65 | #ifdef CACHE_HASH | 
|  | 66 | op->ob_shash = -1; | 
|  | 67 | #endif | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 68 | #ifdef INTERN_STRINGS | 
|  | 69 | op->ob_sinterned = NULL; | 
|  | 70 | #endif | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 71 | if (str != NULL) | 
|  | 72 | memcpy(op->ob_sval, str, size); | 
|  | 73 | op->ob_sval[size] = '\0'; | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 74 | #ifndef DONT_SHARE_SHORT_STRINGS | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 75 | if (size == 0) { | 
| Tim Peters | 9e897f4 | 2001-05-09 07:37:07 +0000 | [diff] [blame] | 76 | PyObject *t = (PyObject *)op; | 
|  | 77 | PyString_InternInPlace(&t); | 
| Tim Peters | 4862ab7 | 2001-05-09 08:43:21 +0000 | [diff] [blame] | 78 | op = (PyStringObject *)t; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 79 | nullstring = op; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 80 | Py_INCREF(op); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 81 | } else if (size == 1 && str != NULL) { | 
| Tim Peters | 9e897f4 | 2001-05-09 07:37:07 +0000 | [diff] [blame] | 82 | PyObject *t = (PyObject *)op; | 
|  | 83 | PyString_InternInPlace(&t); | 
| Tim Peters | 4862ab7 | 2001-05-09 08:43:21 +0000 | [diff] [blame] | 84 | op = (PyStringObject *)t; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 85 | characters[*str & UCHAR_MAX] = op; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 86 | Py_INCREF(op); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 87 | } | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 88 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 89 | return (PyObject *) op; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 90 | } | 
|  | 91 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 92 | PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 93 | PyString_FromString(const char *str) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 94 | { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 95 | register size_t size = strlen(str); | 
| Tim Peters | 9e897f4 | 2001-05-09 07:37:07 +0000 | [diff] [blame] | 96 | register PyStringObject *op; | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 97 | if (size > INT_MAX) { | 
|  | 98 | PyErr_SetString(PyExc_OverflowError, | 
|  | 99 | "string is too long for a Python string"); | 
|  | 100 | return NULL; | 
|  | 101 | } | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 102 | #ifndef DONT_SHARE_SHORT_STRINGS | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 103 | if (size == 0 && (op = nullstring) != NULL) { | 
|  | 104 | #ifdef COUNT_ALLOCS | 
|  | 105 | null_strings++; | 
|  | 106 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 107 | Py_INCREF(op); | 
|  | 108 | return (PyObject *)op; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 109 | } | 
|  | 110 | if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { | 
|  | 111 | #ifdef COUNT_ALLOCS | 
|  | 112 | one_strings++; | 
|  | 113 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 114 | Py_INCREF(op); | 
|  | 115 | return (PyObject *)op; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 116 | } | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 117 | #endif /* DONT_SHARE_SHORT_STRINGS */ | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 118 |  | 
|  | 119 | /* PyObject_NewVar is inlined */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 120 | op = (PyStringObject *) | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 121 | PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char)); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 122 | if (op == NULL) | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 123 | return PyErr_NoMemory(); | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 124 | PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 125 | #ifdef CACHE_HASH | 
|  | 126 | op->ob_shash = -1; | 
|  | 127 | #endif | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 128 | #ifdef INTERN_STRINGS | 
|  | 129 | op->ob_sinterned = NULL; | 
|  | 130 | #endif | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 131 | strcpy(op->ob_sval, str); | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 132 | #ifndef DONT_SHARE_SHORT_STRINGS | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 133 | if (size == 0) { | 
| Tim Peters | 9e897f4 | 2001-05-09 07:37:07 +0000 | [diff] [blame] | 134 | PyObject *t = (PyObject *)op; | 
|  | 135 | PyString_InternInPlace(&t); | 
| Tim Peters | 4862ab7 | 2001-05-09 08:43:21 +0000 | [diff] [blame] | 136 | op = (PyStringObject *)t; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 137 | nullstring = op; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 138 | Py_INCREF(op); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 139 | } else if (size == 1) { | 
| Tim Peters | 9e897f4 | 2001-05-09 07:37:07 +0000 | [diff] [blame] | 140 | PyObject *t = (PyObject *)op; | 
|  | 141 | PyString_InternInPlace(&t); | 
| Tim Peters | 4862ab7 | 2001-05-09 08:43:21 +0000 | [diff] [blame] | 142 | op = (PyStringObject *)t; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 143 | characters[*str & UCHAR_MAX] = op; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 144 | Py_INCREF(op); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 145 | } | 
| Sjoerd Mullender | 615194a | 1993-11-01 13:46:50 +0000 | [diff] [blame] | 146 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 147 | return (PyObject *) op; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 148 | } | 
|  | 149 |  | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 150 | PyObject *PyString_Decode(const char *s, | 
|  | 151 | int size, | 
|  | 152 | const char *encoding, | 
|  | 153 | const char *errors) | 
|  | 154 | { | 
|  | 155 | PyObject *buffer = NULL, *str; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 156 |  | 
|  | 157 | if (encoding == NULL) | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 158 | encoding = PyUnicode_GetDefaultEncoding(); | 
|  | 159 |  | 
|  | 160 | /* Decode via the codec registry */ | 
|  | 161 | buffer = PyBuffer_FromMemory((void *)s, size); | 
|  | 162 | if (buffer == NULL) | 
|  | 163 | goto onError; | 
|  | 164 | str = PyCodec_Decode(buffer, encoding, errors); | 
|  | 165 | if (str == NULL) | 
|  | 166 | goto onError; | 
|  | 167 | /* Convert Unicode to a string using the default encoding */ | 
|  | 168 | if (PyUnicode_Check(str)) { | 
|  | 169 | PyObject *temp = str; | 
|  | 170 | str = PyUnicode_AsEncodedString(str, NULL, NULL); | 
|  | 171 | Py_DECREF(temp); | 
|  | 172 | if (str == NULL) | 
|  | 173 | goto onError; | 
|  | 174 | } | 
|  | 175 | if (!PyString_Check(str)) { | 
|  | 176 | PyErr_Format(PyExc_TypeError, | 
| Andrew M. Kuchling | bd9848d | 2000-07-12 02:58:28 +0000 | [diff] [blame] | 177 | "decoder did not return a string object (type=%.400s)", | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 178 | str->ob_type->tp_name); | 
|  | 179 | Py_DECREF(str); | 
|  | 180 | goto onError; | 
|  | 181 | } | 
|  | 182 | Py_DECREF(buffer); | 
|  | 183 | return str; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 184 |  | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 185 | onError: | 
|  | 186 | Py_XDECREF(buffer); | 
|  | 187 | return NULL; | 
|  | 188 | } | 
|  | 189 |  | 
|  | 190 | PyObject *PyString_Encode(const char *s, | 
|  | 191 | int size, | 
|  | 192 | const char *encoding, | 
|  | 193 | const char *errors) | 
|  | 194 | { | 
|  | 195 | PyObject *v, *str; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 196 |  | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 197 | str = PyString_FromStringAndSize(s, size); | 
|  | 198 | if (str == NULL) | 
|  | 199 | return NULL; | 
|  | 200 | v = PyString_AsEncodedString(str, encoding, errors); | 
|  | 201 | Py_DECREF(str); | 
|  | 202 | return v; | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | PyObject *PyString_AsEncodedString(PyObject *str, | 
|  | 206 | const char *encoding, | 
|  | 207 | const char *errors) | 
|  | 208 | { | 
|  | 209 | PyObject *v; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 210 |  | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 211 | if (!PyString_Check(str)) { | 
|  | 212 | PyErr_BadArgument(); | 
|  | 213 | goto onError; | 
|  | 214 | } | 
|  | 215 |  | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 216 | if (encoding == NULL) | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 217 | encoding = PyUnicode_GetDefaultEncoding(); | 
|  | 218 |  | 
|  | 219 | /* Encode via the codec registry */ | 
|  | 220 | v = PyCodec_Encode(str, encoding, errors); | 
|  | 221 | if (v == NULL) | 
|  | 222 | goto onError; | 
|  | 223 | /* Convert Unicode to a string using the default encoding */ | 
|  | 224 | if (PyUnicode_Check(v)) { | 
|  | 225 | PyObject *temp = v; | 
|  | 226 | v = PyUnicode_AsEncodedString(v, NULL, NULL); | 
|  | 227 | Py_DECREF(temp); | 
|  | 228 | if (v == NULL) | 
|  | 229 | goto onError; | 
|  | 230 | } | 
|  | 231 | if (!PyString_Check(v)) { | 
|  | 232 | PyErr_Format(PyExc_TypeError, | 
|  | 233 | "encoder did not return a string object (type=%.400s)", | 
|  | 234 | v->ob_type->tp_name); | 
|  | 235 | Py_DECREF(v); | 
|  | 236 | goto onError; | 
|  | 237 | } | 
|  | 238 | return v; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 239 |  | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 240 | onError: | 
|  | 241 | return NULL; | 
|  | 242 | } | 
|  | 243 |  | 
| Guido van Rossum | 234f942 | 1993-06-17 12:35:49 +0000 | [diff] [blame] | 244 | static void | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 245 | string_dealloc(PyObject *op) | 
| Guido van Rossum | 719f5fa | 1992-03-27 17:31:02 +0000 | [diff] [blame] | 246 | { | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 247 | PyObject_DEL(op); | 
| Guido van Rossum | 719f5fa | 1992-03-27 17:31:02 +0000 | [diff] [blame] | 248 | } | 
|  | 249 |  | 
| Marc-André Lemburg | d1ba443 | 2000-09-19 21:04:18 +0000 | [diff] [blame] | 250 | static int | 
|  | 251 | string_getsize(register PyObject *op) | 
|  | 252 | { | 
|  | 253 | char *s; | 
|  | 254 | int len; | 
|  | 255 | if (PyString_AsStringAndSize(op, &s, &len)) | 
|  | 256 | return -1; | 
|  | 257 | return len; | 
|  | 258 | } | 
|  | 259 |  | 
|  | 260 | static /*const*/ char * | 
|  | 261 | string_getbuffer(register PyObject *op) | 
|  | 262 | { | 
|  | 263 | char *s; | 
|  | 264 | int len; | 
|  | 265 | if (PyString_AsStringAndSize(op, &s, &len)) | 
|  | 266 | return NULL; | 
|  | 267 | return s; | 
|  | 268 | } | 
|  | 269 |  | 
| Guido van Rossum | d7047b3 | 1995-01-02 19:07:15 +0000 | [diff] [blame] | 270 | int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 271 | PyString_Size(register PyObject *op) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 272 | { | 
| Marc-André Lemburg | d1ba443 | 2000-09-19 21:04:18 +0000 | [diff] [blame] | 273 | if (!PyString_Check(op)) | 
|  | 274 | return string_getsize(op); | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 275 | return ((PyStringObject *)op) -> ob_size; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 276 | } | 
|  | 277 |  | 
|  | 278 | /*const*/ char * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 279 | PyString_AsString(register PyObject *op) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 280 | { | 
| Marc-André Lemburg | d1ba443 | 2000-09-19 21:04:18 +0000 | [diff] [blame] | 281 | if (!PyString_Check(op)) | 
|  | 282 | return string_getbuffer(op); | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 283 | return ((PyStringObject *)op) -> ob_sval; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 284 | } | 
|  | 285 |  | 
| Marc-André Lemburg | d1ba443 | 2000-09-19 21:04:18 +0000 | [diff] [blame] | 286 | /* Internal API needed by PyString_AsStringAndSize(): */ | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 287 | extern | 
| Marc-André Lemburg | d1ba443 | 2000-09-19 21:04:18 +0000 | [diff] [blame] | 288 | PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, | 
|  | 289 | const char *errors); | 
|  | 290 |  | 
|  | 291 | int | 
|  | 292 | PyString_AsStringAndSize(register PyObject *obj, | 
|  | 293 | register char **s, | 
|  | 294 | register int *len) | 
|  | 295 | { | 
|  | 296 | if (s == NULL) { | 
|  | 297 | PyErr_BadInternalCall(); | 
|  | 298 | return -1; | 
|  | 299 | } | 
|  | 300 |  | 
|  | 301 | if (!PyString_Check(obj)) { | 
|  | 302 | if (PyUnicode_Check(obj)) { | 
|  | 303 | obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); | 
|  | 304 | if (obj == NULL) | 
|  | 305 | return -1; | 
|  | 306 | } | 
|  | 307 | else { | 
|  | 308 | PyErr_Format(PyExc_TypeError, | 
|  | 309 | "expected string or Unicode object, " | 
|  | 310 | "%.200s found", obj->ob_type->tp_name); | 
|  | 311 | return -1; | 
|  | 312 | } | 
|  | 313 | } | 
|  | 314 |  | 
|  | 315 | *s = PyString_AS_STRING(obj); | 
|  | 316 | if (len != NULL) | 
|  | 317 | *len = PyString_GET_SIZE(obj); | 
|  | 318 | else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) { | 
|  | 319 | PyErr_SetString(PyExc_TypeError, | 
|  | 320 | "expected string without null bytes"); | 
|  | 321 | return -1; | 
|  | 322 | } | 
|  | 323 | return 0; | 
|  | 324 | } | 
|  | 325 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 326 | /* Methods */ | 
|  | 327 |  | 
| Guido van Rossum | bcaa31c | 1991-06-07 22:58:57 +0000 | [diff] [blame] | 328 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 329 | string_print(PyStringObject *op, FILE *fp, int flags) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 330 | { | 
|  | 331 | int i; | 
|  | 332 | char c; | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 333 | int quote; | 
| Guido van Rossum | bcaa31c | 1991-06-07 22:58:57 +0000 | [diff] [blame] | 334 | /* XXX Ought to check for interrupts when writing long strings */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 335 | if (flags & Py_PRINT_RAW) { | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 336 | fwrite(op->ob_sval, 1, (int) op->ob_size, fp); | 
| Guido van Rossum | bcaa31c | 1991-06-07 22:58:57 +0000 | [diff] [blame] | 337 | return 0; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 338 | } | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 339 |  | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 340 | /* figure out which quote to use; single is preferred */ | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 341 | quote = '\''; | 
|  | 342 | if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"')) | 
|  | 343 | quote = '"'; | 
|  | 344 |  | 
|  | 345 | fputc(quote, fp); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 346 | for (i = 0; i < op->ob_size; i++) { | 
|  | 347 | c = op->ob_sval[i]; | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 348 | if (c == quote || c == '\\') | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 349 | fprintf(fp, "\\%c", c); | 
| Ka-Ping Yee | fa004ad | 2001-01-24 17:19:08 +0000 | [diff] [blame] | 350 | else if (c == '\t') | 
|  | 351 | fprintf(fp, "\\t"); | 
|  | 352 | else if (c == '\n') | 
|  | 353 | fprintf(fp, "\\n"); | 
|  | 354 | else if (c == '\r') | 
|  | 355 | fprintf(fp, "\\r"); | 
|  | 356 | else if (c < ' ' || c >= 0x7f) | 
|  | 357 | fprintf(fp, "\\x%02x", c & 0xff); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 358 | else | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 359 | fputc(c, fp); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 360 | } | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 361 | fputc(quote, fp); | 
| Guido van Rossum | bcaa31c | 1991-06-07 22:58:57 +0000 | [diff] [blame] | 362 | return 0; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 363 | } | 
|  | 364 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 365 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 366 | string_repr(register PyStringObject *op) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 367 | { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 368 | size_t newsize = 2 + 4 * op->ob_size * sizeof(char); | 
|  | 369 | PyObject *v; | 
|  | 370 | if (newsize > INT_MAX) { | 
|  | 371 | PyErr_SetString(PyExc_OverflowError, | 
|  | 372 | "string is too large to make repr"); | 
|  | 373 | } | 
|  | 374 | v = PyString_FromStringAndSize((char *)NULL, newsize); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 375 | if (v == NULL) { | 
| Guido van Rossum | bcaa31c | 1991-06-07 22:58:57 +0000 | [diff] [blame] | 376 | return NULL; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 377 | } | 
|  | 378 | else { | 
|  | 379 | register int i; | 
|  | 380 | register char c; | 
|  | 381 | register char *p; | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 382 | int quote; | 
|  | 383 |  | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 384 | /* figure out which quote to use; single is preferred */ | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 385 | quote = '\''; | 
|  | 386 | if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"')) | 
|  | 387 | quote = '"'; | 
|  | 388 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 389 | p = ((PyStringObject *)v)->ob_sval; | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 390 | *p++ = quote; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 391 | for (i = 0; i < op->ob_size; i++) { | 
|  | 392 | c = op->ob_sval[i]; | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 393 | if (c == quote || c == '\\') | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 394 | *p++ = '\\', *p++ = c; | 
| Ka-Ping Yee | fa004ad | 2001-01-24 17:19:08 +0000 | [diff] [blame] | 395 | else if (c == '\t') | 
|  | 396 | *p++ = '\\', *p++ = 't'; | 
|  | 397 | else if (c == '\n') | 
|  | 398 | *p++ = '\\', *p++ = 'n'; | 
|  | 399 | else if (c == '\r') | 
|  | 400 | *p++ = '\\', *p++ = 'r'; | 
|  | 401 | else if (c < ' ' || c >= 0x7f) { | 
|  | 402 | sprintf(p, "\\x%02x", c & 0xff); | 
|  | 403 | p += 4; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 404 | } | 
|  | 405 | else | 
|  | 406 | *p++ = c; | 
|  | 407 | } | 
| Guido van Rossum | 444fc7c | 1993-10-26 15:25:16 +0000 | [diff] [blame] | 408 | *p++ = quote; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 409 | *p = '\0'; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 410 | _PyString_Resize( | 
|  | 411 | &v, (int) (p - ((PyStringObject *)v)->ob_sval)); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 412 | return v; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 413 | } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 414 | } | 
|  | 415 |  | 
| Guido van Rossum | 189f1df | 2001-05-01 16:51:53 +0000 | [diff] [blame] | 416 | static PyObject * | 
|  | 417 | string_str(PyObject *s) | 
|  | 418 | { | 
|  | 419 | Py_INCREF(s); | 
|  | 420 | return s; | 
|  | 421 | } | 
|  | 422 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 423 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 424 | string_length(PyStringObject *a) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 425 | { | 
|  | 426 | return a->ob_size; | 
|  | 427 | } | 
|  | 428 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 429 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 430 | string_concat(register PyStringObject *a, register PyObject *bb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 431 | { | 
|  | 432 | register unsigned int size; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 433 | register PyStringObject *op; | 
|  | 434 | if (!PyString_Check(bb)) { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 435 | if (PyUnicode_Check(bb)) | 
|  | 436 | return PyUnicode_Concat((PyObject *)a, bb); | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 437 | PyErr_Format(PyExc_TypeError, | 
| Fred Drake | b6a9ada | 2000-06-01 03:12:13 +0000 | [diff] [blame] | 438 | "cannot add type \"%.200s\" to string", | 
|  | 439 | bb->ob_type->tp_name); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 440 | return NULL; | 
|  | 441 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 442 | #define b ((PyStringObject *)bb) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 443 | /* Optimize cases with empty left or right operand */ | 
|  | 444 | if (a->ob_size == 0) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 445 | Py_INCREF(bb); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 446 | return bb; | 
|  | 447 | } | 
|  | 448 | if (b->ob_size == 0) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 449 | Py_INCREF(a); | 
|  | 450 | return (PyObject *)a; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 451 | } | 
|  | 452 | size = a->ob_size + b->ob_size; | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 453 | /* PyObject_NewVar is inlined */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 454 | op = (PyStringObject *) | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 455 | PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char)); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 456 | if (op == NULL) | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 457 | return PyErr_NoMemory(); | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 458 | PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 459 | #ifdef CACHE_HASH | 
|  | 460 | op->ob_shash = -1; | 
|  | 461 | #endif | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 462 | #ifdef INTERN_STRINGS | 
|  | 463 | op->ob_sinterned = NULL; | 
|  | 464 | #endif | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 465 | memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); | 
|  | 466 | memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); | 
|  | 467 | op->ob_sval[size] = '\0'; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 468 | return (PyObject *) op; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 469 | #undef b | 
|  | 470 | } | 
|  | 471 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 472 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 473 | string_repeat(register PyStringObject *a, register int n) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 474 | { | 
|  | 475 | register int i; | 
| Guido van Rossum | 2095d24 | 1997-04-09 19:41:24 +0000 | [diff] [blame] | 476 | register int size; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 477 | register PyStringObject *op; | 
| Tim Peters | 8f42246 | 2000-09-09 06:13:41 +0000 | [diff] [blame] | 478 | size_t nbytes; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 479 | if (n < 0) | 
|  | 480 | n = 0; | 
| Tim Peters | 8f42246 | 2000-09-09 06:13:41 +0000 | [diff] [blame] | 481 | /* watch out for overflows:  the size can overflow int, | 
|  | 482 | * and the # of bytes needed can overflow size_t | 
|  | 483 | */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 484 | size = a->ob_size * n; | 
| Tim Peters | 8f42246 | 2000-09-09 06:13:41 +0000 | [diff] [blame] | 485 | if (n && size / n != a->ob_size) { | 
|  | 486 | PyErr_SetString(PyExc_OverflowError, | 
|  | 487 | "repeated string is too long"); | 
|  | 488 | return NULL; | 
|  | 489 | } | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 490 | if (size == a->ob_size) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 491 | Py_INCREF(a); | 
|  | 492 | return (PyObject *)a; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 493 | } | 
| Tim Peters | 8f42246 | 2000-09-09 06:13:41 +0000 | [diff] [blame] | 494 | nbytes = size * sizeof(char); | 
|  | 495 | if (nbytes / sizeof(char) != (size_t)size || | 
|  | 496 | nbytes + sizeof(PyStringObject) <= nbytes) { | 
|  | 497 | PyErr_SetString(PyExc_OverflowError, | 
|  | 498 | "repeated string is too long"); | 
|  | 499 | return NULL; | 
|  | 500 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 501 | op = (PyStringObject *) | 
| Tim Peters | 8f42246 | 2000-09-09 06:13:41 +0000 | [diff] [blame] | 502 | PyObject_MALLOC(sizeof(PyStringObject) + nbytes); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 503 | if (op == NULL) | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 504 | return PyErr_NoMemory(); | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 505 | PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 506 | #ifdef CACHE_HASH | 
|  | 507 | op->ob_shash = -1; | 
|  | 508 | #endif | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 509 | #ifdef INTERN_STRINGS | 
|  | 510 | op->ob_sinterned = NULL; | 
|  | 511 | #endif | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 512 | for (i = 0; i < size; i += a->ob_size) | 
|  | 513 | memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); | 
|  | 514 | op->ob_sval[size] = '\0'; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 515 | return (PyObject *) op; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 516 | } | 
|  | 517 |  | 
|  | 518 | /* String slice a[i:j] consists of characters a[i] ... a[j-1] */ | 
|  | 519 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 520 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 521 | string_slice(register PyStringObject *a, register int i, register int j) | 
|  | 522 | /* j -- may be negative! */ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 523 | { | 
|  | 524 | if (i < 0) | 
|  | 525 | i = 0; | 
|  | 526 | if (j < 0) | 
|  | 527 | j = 0; /* Avoid signed/unsigned bug in next line */ | 
|  | 528 | if (j > a->ob_size) | 
|  | 529 | j = a->ob_size; | 
|  | 530 | if (i == 0 && j == a->ob_size) { /* It's the same as a */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 531 | Py_INCREF(a); | 
|  | 532 | return (PyObject *)a; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 533 | } | 
|  | 534 | if (j < i) | 
|  | 535 | j = i; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 536 | return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 537 | } | 
|  | 538 |  | 
| Guido van Rossum | 9284a57 | 2000-03-07 15:53:43 +0000 | [diff] [blame] | 539 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 540 | string_contains(PyObject *a, PyObject *el) | 
| Guido van Rossum | 9284a57 | 2000-03-07 15:53:43 +0000 | [diff] [blame] | 541 | { | 
|  | 542 | register char *s, *end; | 
|  | 543 | register char c; | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 544 | if (PyUnicode_Check(el)) | 
| Guido van Rossum | 96a45ad | 2000-03-13 15:56:08 +0000 | [diff] [blame] | 545 | return PyUnicode_Contains(a, el); | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 546 | if (!PyString_Check(el) || PyString_Size(el) != 1) { | 
| Guido van Rossum | 9284a57 | 2000-03-07 15:53:43 +0000 | [diff] [blame] | 547 | PyErr_SetString(PyExc_TypeError, | 
| Andrew M. Kuchling | cb95a14 | 2000-06-09 14:04:53 +0000 | [diff] [blame] | 548 | "'in <string>' requires character as left operand"); | 
| Guido van Rossum | 9284a57 | 2000-03-07 15:53:43 +0000 | [diff] [blame] | 549 | return -1; | 
|  | 550 | } | 
|  | 551 | c = PyString_AsString(el)[0]; | 
|  | 552 | s = PyString_AsString(a); | 
|  | 553 | end = s + PyString_Size(a); | 
|  | 554 | while (s < end) { | 
|  | 555 | if (c == *s++) | 
|  | 556 | return 1; | 
|  | 557 | } | 
|  | 558 | return 0; | 
|  | 559 | } | 
|  | 560 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 561 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 562 | string_item(PyStringObject *a, register int i) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 563 | { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 564 | PyObject *v; | 
| Tim Peters | 5b4d477 | 2001-05-08 22:33:50 +0000 | [diff] [blame] | 565 | char *pchar; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 566 | if (i < 0 || i >= a->ob_size) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 567 | PyErr_SetString(PyExc_IndexError, "string index out of range"); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 568 | return NULL; | 
|  | 569 | } | 
| Tim Peters | 5b4d477 | 2001-05-08 22:33:50 +0000 | [diff] [blame] | 570 | pchar = a->ob_sval + i; | 
| Tim Peters | cf5ad5d | 2001-05-09 00:24:55 +0000 | [diff] [blame] | 571 | v = (PyObject *)characters[*pchar & UCHAR_MAX]; | 
| Tim Peters | 5b4d477 | 2001-05-08 22:33:50 +0000 | [diff] [blame] | 572 | if (v == NULL) | 
|  | 573 | v = PyString_FromStringAndSize(pchar, 1); | 
| Tim Peters | b4bbcd7 | 2001-05-09 00:31:40 +0000 | [diff] [blame] | 574 | else { | 
|  | 575 | #ifdef COUNT_ALLOCS | 
|  | 576 | one_strings++; | 
|  | 577 | #endif | 
| Tim Peters | cf5ad5d | 2001-05-09 00:24:55 +0000 | [diff] [blame] | 578 | Py_INCREF(v); | 
| Tim Peters | b4bbcd7 | 2001-05-09 00:31:40 +0000 | [diff] [blame] | 579 | } | 
| Guido van Rossum | daa8bb3 | 1991-04-04 10:48:33 +0000 | [diff] [blame] | 580 | return v; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 581 | } | 
|  | 582 |  | 
|  | 583 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 584 | string_compare(PyStringObject *a, PyStringObject *b) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 585 | { | 
| Guido van Rossum | 253919f | 1991-02-13 23:18:39 +0000 | [diff] [blame] | 586 | int len_a = a->ob_size, len_b = b->ob_size; | 
|  | 587 | int min_len = (len_a < len_b) ? len_a : len_b; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 588 | int cmp; | 
|  | 589 | if (min_len > 0) { | 
| Guido van Rossum | fde7a75 | 1996-10-23 14:19:40 +0000 | [diff] [blame] | 590 | cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 591 | if (cmp == 0) | 
|  | 592 | cmp = memcmp(a->ob_sval, b->ob_sval, min_len); | 
|  | 593 | if (cmp != 0) | 
|  | 594 | return cmp; | 
|  | 595 | } | 
| Guido van Rossum | 253919f | 1991-02-13 23:18:39 +0000 | [diff] [blame] | 596 | return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 597 | } | 
|  | 598 |  | 
| Guido van Rossum | 9bfef44 | 1993-03-29 10:43:31 +0000 | [diff] [blame] | 599 | static long | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 600 | string_hash(PyStringObject *a) | 
| Guido van Rossum | 9bfef44 | 1993-03-29 10:43:31 +0000 | [diff] [blame] | 601 | { | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 602 | register int len; | 
|  | 603 | register unsigned char *p; | 
|  | 604 | register long x; | 
|  | 605 |  | 
|  | 606 | #ifdef CACHE_HASH | 
|  | 607 | if (a->ob_shash != -1) | 
|  | 608 | return a->ob_shash; | 
| Guido van Rossum | 36b9f79 | 1997-02-14 16:29:22 +0000 | [diff] [blame] | 609 | #ifdef INTERN_STRINGS | 
|  | 610 | if (a->ob_sinterned != NULL) | 
|  | 611 | return (a->ob_shash = | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 612 | ((PyStringObject *)(a->ob_sinterned))->ob_shash); | 
| Guido van Rossum | 36b9f79 | 1997-02-14 16:29:22 +0000 | [diff] [blame] | 613 | #endif | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 614 | #endif | 
|  | 615 | len = a->ob_size; | 
|  | 616 | p = (unsigned char *) a->ob_sval; | 
|  | 617 | x = *p << 7; | 
| Guido van Rossum | 9bfef44 | 1993-03-29 10:43:31 +0000 | [diff] [blame] | 618 | while (--len >= 0) | 
| Guido van Rossum | eddcb3b | 1996-09-11 20:22:48 +0000 | [diff] [blame] | 619 | x = (1000003*x) ^ *p++; | 
| Guido van Rossum | 9bfef44 | 1993-03-29 10:43:31 +0000 | [diff] [blame] | 620 | x ^= a->ob_size; | 
|  | 621 | if (x == -1) | 
|  | 622 | x = -2; | 
| Sjoerd Mullender | 3bb8a05 | 1993-10-22 12:04:32 +0000 | [diff] [blame] | 623 | #ifdef CACHE_HASH | 
|  | 624 | a->ob_shash = x; | 
|  | 625 | #endif | 
| Guido van Rossum | 9bfef44 | 1993-03-29 10:43:31 +0000 | [diff] [blame] | 626 | return x; | 
|  | 627 | } | 
|  | 628 |  | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 629 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 630 | string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr) | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 631 | { | 
|  | 632 | if ( index != 0 ) { | 
| Guido van Rossum | 045e688 | 1997-09-08 18:30:11 +0000 | [diff] [blame] | 633 | PyErr_SetString(PyExc_SystemError, | 
| Guido van Rossum | 1db7070 | 1998-10-08 02:18:52 +0000 | [diff] [blame] | 634 | "accessing non-existent string segment"); | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 635 | return -1; | 
|  | 636 | } | 
|  | 637 | *ptr = (void *)self->ob_sval; | 
|  | 638 | return self->ob_size; | 
|  | 639 | } | 
|  | 640 |  | 
|  | 641 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 642 | string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr) | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 643 | { | 
| Guido van Rossum | 045e688 | 1997-09-08 18:30:11 +0000 | [diff] [blame] | 644 | PyErr_SetString(PyExc_TypeError, | 
| Guido van Rossum | 07d7800 | 1998-10-01 15:59:48 +0000 | [diff] [blame] | 645 | "Cannot use string as modifiable buffer"); | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 646 | return -1; | 
|  | 647 | } | 
|  | 648 |  | 
|  | 649 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 650 | string_buffer_getsegcount(PyStringObject *self, int *lenp) | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 651 | { | 
|  | 652 | if ( lenp ) | 
|  | 653 | *lenp = self->ob_size; | 
|  | 654 | return 1; | 
|  | 655 | } | 
|  | 656 |  | 
| Guido van Rossum | 1db7070 | 1998-10-08 02:18:52 +0000 | [diff] [blame] | 657 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 658 | string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr) | 
| Guido van Rossum | 1db7070 | 1998-10-08 02:18:52 +0000 | [diff] [blame] | 659 | { | 
|  | 660 | if ( index != 0 ) { | 
|  | 661 | PyErr_SetString(PyExc_SystemError, | 
|  | 662 | "accessing non-existent string segment"); | 
|  | 663 | return -1; | 
|  | 664 | } | 
|  | 665 | *ptr = self->ob_sval; | 
|  | 666 | return self->ob_size; | 
|  | 667 | } | 
|  | 668 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 669 | static PySequenceMethods string_as_sequence = { | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 670 | (inquiry)string_length, /*sq_length*/ | 
|  | 671 | (binaryfunc)string_concat, /*sq_concat*/ | 
|  | 672 | (intargfunc)string_repeat, /*sq_repeat*/ | 
|  | 673 | (intargfunc)string_item, /*sq_item*/ | 
|  | 674 | (intintargfunc)string_slice, /*sq_slice*/ | 
| Guido van Rossum | f380e66 | 1991-06-04 19:36:32 +0000 | [diff] [blame] | 675 | 0,		/*sq_ass_item*/ | 
|  | 676 | 0,		/*sq_ass_slice*/ | 
| Guido van Rossum | 9284a57 | 2000-03-07 15:53:43 +0000 | [diff] [blame] | 677 | (objobjproc)string_contains /*sq_contains*/ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 678 | }; | 
|  | 679 |  | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 680 | static PyBufferProcs string_as_buffer = { | 
|  | 681 | (getreadbufferproc)string_buffer_getreadbuf, | 
|  | 682 | (getwritebufferproc)string_buffer_getwritebuf, | 
|  | 683 | (getsegcountproc)string_buffer_getsegcount, | 
| Guido van Rossum | 1db7070 | 1998-10-08 02:18:52 +0000 | [diff] [blame] | 684 | (getcharbufferproc)string_buffer_getcharbuf, | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 685 | }; | 
|  | 686 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 687 |  | 
|  | 688 |  | 
|  | 689 | #define LEFTSTRIP 0 | 
|  | 690 | #define RIGHTSTRIP 1 | 
|  | 691 | #define BOTHSTRIP 2 | 
|  | 692 |  | 
|  | 693 |  | 
|  | 694 | static PyObject * | 
| Tim Peters | c2e7da9 | 2000-07-09 08:02:21 +0000 | [diff] [blame] | 695 | split_whitespace(const char *s, int len, int maxsplit) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 696 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 697 | int i, j, err; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 698 | PyObject* item; | 
|  | 699 | PyObject *list = PyList_New(0); | 
|  | 700 |  | 
|  | 701 | if (list == NULL) | 
|  | 702 | return NULL; | 
|  | 703 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 704 | for (i = j = 0; i < len; ) { | 
|  | 705 | while (i < len && isspace(Py_CHARMASK(s[i]))) | 
|  | 706 | i++; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 707 | j = i; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 708 | while (i < len && !isspace(Py_CHARMASK(s[i]))) | 
|  | 709 | i++; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 710 | if (j < i) { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 711 | if (maxsplit-- <= 0) | 
|  | 712 | break; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 713 | item = PyString_FromStringAndSize(s+j, (int)(i-j)); | 
|  | 714 | if (item == NULL) | 
|  | 715 | goto finally; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 716 | err = PyList_Append(list, item); | 
|  | 717 | Py_DECREF(item); | 
|  | 718 | if (err < 0) | 
|  | 719 | goto finally; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 720 | while (i < len && isspace(Py_CHARMASK(s[i]))) | 
|  | 721 | i++; | 
|  | 722 | j = i; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 723 | } | 
|  | 724 | } | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 725 | if (j < len) { | 
|  | 726 | item = PyString_FromStringAndSize(s+j, (int)(len - j)); | 
|  | 727 | if (item == NULL) | 
|  | 728 | goto finally; | 
|  | 729 | err = PyList_Append(list, item); | 
|  | 730 | Py_DECREF(item); | 
|  | 731 | if (err < 0) | 
|  | 732 | goto finally; | 
|  | 733 | } | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 734 | return list; | 
|  | 735 | finally: | 
|  | 736 | Py_DECREF(list); | 
|  | 737 | return NULL; | 
|  | 738 | } | 
|  | 739 |  | 
|  | 740 |  | 
|  | 741 | static char split__doc__[] = | 
|  | 742 | "S.split([sep [,maxsplit]]) -> list of strings\n\ | 
|  | 743 | \n\ | 
|  | 744 | Return a list of the words in the string S, using sep as the\n\ | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 745 | delimiter string.  If maxsplit is given, at most maxsplit\n\ | 
|  | 746 | splits are done. If sep is not specified, any whitespace string\n\ | 
|  | 747 | is a separator."; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 748 |  | 
|  | 749 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 750 | string_split(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 751 | { | 
|  | 752 | int len = PyString_GET_SIZE(self), n, i, j, err; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 753 | int maxsplit = -1; | 
|  | 754 | const char *s = PyString_AS_STRING(self), *sub; | 
|  | 755 | PyObject *list, *item, *subobj = Py_None; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 756 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 757 | if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 758 | return NULL; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 759 | if (maxsplit < 0) | 
|  | 760 | maxsplit = INT_MAX; | 
|  | 761 | if (subobj == Py_None) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 762 | return split_whitespace(s, len, maxsplit); | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 763 | if (PyString_Check(subobj)) { | 
|  | 764 | sub = PyString_AS_STRING(subobj); | 
|  | 765 | n = PyString_GET_SIZE(subobj); | 
|  | 766 | } | 
|  | 767 | else if (PyUnicode_Check(subobj)) | 
|  | 768 | return PyUnicode_Split((PyObject *)self, subobj, maxsplit); | 
|  | 769 | else if (PyObject_AsCharBuffer(subobj, &sub, &n)) | 
|  | 770 | return NULL; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 771 | if (n == 0) { | 
|  | 772 | PyErr_SetString(PyExc_ValueError, "empty separator"); | 
|  | 773 | return NULL; | 
|  | 774 | } | 
|  | 775 |  | 
|  | 776 | list = PyList_New(0); | 
|  | 777 | if (list == NULL) | 
|  | 778 | return NULL; | 
|  | 779 |  | 
|  | 780 | i = j = 0; | 
|  | 781 | while (i+n <= len) { | 
| Fred Drake | 396f6e0 | 2000-06-20 15:47:54 +0000 | [diff] [blame] | 782 | if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 783 | if (maxsplit-- <= 0) | 
|  | 784 | break; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 785 | item = PyString_FromStringAndSize(s+j, (int)(i-j)); | 
|  | 786 | if (item == NULL) | 
|  | 787 | goto fail; | 
|  | 788 | err = PyList_Append(list, item); | 
|  | 789 | Py_DECREF(item); | 
|  | 790 | if (err < 0) | 
|  | 791 | goto fail; | 
|  | 792 | i = j = i + n; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 793 | } | 
|  | 794 | else | 
|  | 795 | i++; | 
|  | 796 | } | 
|  | 797 | item = PyString_FromStringAndSize(s+j, (int)(len-j)); | 
|  | 798 | if (item == NULL) | 
|  | 799 | goto fail; | 
|  | 800 | err = PyList_Append(list, item); | 
|  | 801 | Py_DECREF(item); | 
|  | 802 | if (err < 0) | 
|  | 803 | goto fail; | 
|  | 804 |  | 
|  | 805 | return list; | 
|  | 806 |  | 
|  | 807 | fail: | 
|  | 808 | Py_DECREF(list); | 
|  | 809 | return NULL; | 
|  | 810 | } | 
|  | 811 |  | 
|  | 812 |  | 
|  | 813 | static char join__doc__[] = | 
|  | 814 | "S.join(sequence) -> string\n\ | 
|  | 815 | \n\ | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 816 | Return a string which is the concatenation of the strings in the\n\ | 
|  | 817 | sequence.  The separator between elements is S."; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 818 |  | 
|  | 819 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 820 | string_join(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 821 | { | 
|  | 822 | char *sep = PyString_AS_STRING(self); | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 823 | const int seplen = PyString_GET_SIZE(self); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 824 | PyObject *res = NULL; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 825 | char *p; | 
|  | 826 | int seqlen = 0; | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 827 | size_t sz = 0; | 
|  | 828 | int i; | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 829 | PyObject *orig, *seq, *item; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 830 |  | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 831 | if (!PyArg_ParseTuple(args, "O:join", &orig)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 832 | return NULL; | 
|  | 833 |  | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 834 | seq = PySequence_Fast(orig, ""); | 
|  | 835 | if (seq == NULL) { | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 836 | if (PyErr_ExceptionMatches(PyExc_TypeError)) | 
|  | 837 | PyErr_Format(PyExc_TypeError, | 
|  | 838 | "sequence expected, %.80s found", | 
|  | 839 | orig->ob_type->tp_name); | 
|  | 840 | return NULL; | 
|  | 841 | } | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 842 |  | 
| Jeremy Hylton | 03657cf | 2000-07-12 13:05:33 +0000 | [diff] [blame] | 843 | seqlen = PySequence_Size(seq); | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 844 | if (seqlen == 0) { | 
|  | 845 | Py_DECREF(seq); | 
|  | 846 | return PyString_FromString(""); | 
|  | 847 | } | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 848 | if (seqlen == 1) { | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 849 | item = PySequence_Fast_GET_ITEM(seq, 0); | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 850 | if (!PyString_Check(item) && !PyUnicode_Check(item)) { | 
|  | 851 | PyErr_Format(PyExc_TypeError, | 
|  | 852 | "sequence item 0: expected string," | 
|  | 853 | " %.80s found", | 
|  | 854 | item->ob_type->tp_name); | 
|  | 855 | Py_DECREF(seq); | 
|  | 856 | return NULL; | 
|  | 857 | } | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 858 | Py_INCREF(item); | 
| Barry Warsaw | 771d067 | 2000-07-11 04:58:12 +0000 | [diff] [blame] | 859 | Py_DECREF(seq); | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 860 | return item; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 861 | } | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 862 |  | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 863 | /* There are at least two things to join.  Do a pre-pass to figure out | 
|  | 864 | * the total amount of space we'll need (sz), see whether any argument | 
|  | 865 | * is absurd, and defer to the Unicode join if appropriate. | 
|  | 866 | */ | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 867 | for (i = 0; i < seqlen; i++) { | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 868 | const size_t old_sz = sz; | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 869 | item = PySequence_Fast_GET_ITEM(seq, i); | 
|  | 870 | if (!PyString_Check(item)){ | 
|  | 871 | if (PyUnicode_Check(item)) { | 
| Tim Peters | 2cfe368 | 2001-05-05 05:36:48 +0000 | [diff] [blame] | 872 | /* Defer to Unicode join. | 
|  | 873 | * CAUTION:  There's no gurantee that the | 
|  | 874 | * original sequence can be iterated over | 
|  | 875 | * again, so we must pass seq here. | 
|  | 876 | */ | 
|  | 877 | PyObject *result; | 
|  | 878 | result = PyUnicode_Join((PyObject *)self, seq); | 
| Barry Warsaw | 771d067 | 2000-07-11 04:58:12 +0000 | [diff] [blame] | 879 | Py_DECREF(seq); | 
| Tim Peters | 2cfe368 | 2001-05-05 05:36:48 +0000 | [diff] [blame] | 880 | return result; | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 881 | } | 
|  | 882 | PyErr_Format(PyExc_TypeError, | 
| Jeremy Hylton | 88887aa | 2000-07-11 20:55:38 +0000 | [diff] [blame] | 883 | "sequence item %i: expected string," | 
|  | 884 | " %.80s found", | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 885 | i, item->ob_type->tp_name); | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 886 | Py_DECREF(seq); | 
|  | 887 | return NULL; | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 888 | } | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 889 | sz += PyString_GET_SIZE(item); | 
|  | 890 | if (i != 0) | 
|  | 891 | sz += seplen; | 
|  | 892 | if (sz < old_sz || sz > INT_MAX) { | 
|  | 893 | PyErr_SetString(PyExc_OverflowError, | 
|  | 894 | "join() is too long for a Python string"); | 
|  | 895 | Py_DECREF(seq); | 
|  | 896 | return NULL; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 897 | } | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 898 | } | 
|  | 899 |  | 
|  | 900 | /* Allocate result space. */ | 
|  | 901 | res = PyString_FromStringAndSize((char*)NULL, (int)sz); | 
|  | 902 | if (res == NULL) { | 
|  | 903 | Py_DECREF(seq); | 
|  | 904 | return NULL; | 
|  | 905 | } | 
|  | 906 |  | 
|  | 907 | /* Catenate everything. */ | 
|  | 908 | p = PyString_AS_STRING(res); | 
|  | 909 | for (i = 0; i < seqlen; ++i) { | 
|  | 910 | size_t n; | 
|  | 911 | item = PySequence_Fast_GET_ITEM(seq, i); | 
|  | 912 | n = PyString_GET_SIZE(item); | 
|  | 913 | memcpy(p, PyString_AS_STRING(item), n); | 
|  | 914 | p += n; | 
|  | 915 | if (i < seqlen - 1) { | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 916 | memcpy(p, sep, seplen); | 
|  | 917 | p += seplen; | 
| Jeremy Hylton | 194e43e | 2000-07-10 21:30:28 +0000 | [diff] [blame] | 918 | } | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 919 | } | 
| Tim Peters | 19fe14e | 2001-01-19 03:03:47 +0000 | [diff] [blame] | 920 |  | 
| Jeremy Hylton | 4904829 | 2000-07-11 03:28:17 +0000 | [diff] [blame] | 921 | Py_DECREF(seq); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 922 | return res; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 923 | } | 
|  | 924 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 925 | static long | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 926 | string_find_internal(PyStringObject *self, PyObject *args, int dir) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 927 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 928 | const char *s = PyString_AS_STRING(self), *sub; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 929 | int len = PyString_GET_SIZE(self); | 
|  | 930 | int n, i = 0, last = INT_MAX; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 931 | PyObject *subobj; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 932 |  | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 933 | if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", | 
| Guido van Rossum | c682140 | 2000-05-08 14:08:05 +0000 | [diff] [blame] | 934 | &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last)) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 935 | return -2; | 
|  | 936 | if (PyString_Check(subobj)) { | 
|  | 937 | sub = PyString_AS_STRING(subobj); | 
|  | 938 | n = PyString_GET_SIZE(subobj); | 
|  | 939 | } | 
|  | 940 | else if (PyUnicode_Check(subobj)) | 
|  | 941 | return PyUnicode_Find((PyObject *)self, subobj, i, last, 1); | 
|  | 942 | else if (PyObject_AsCharBuffer(subobj, &sub, &n)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 943 | return -2; | 
|  | 944 |  | 
|  | 945 | if (last > len) | 
|  | 946 | last = len; | 
|  | 947 | if (last < 0) | 
|  | 948 | last += len; | 
|  | 949 | if (last < 0) | 
|  | 950 | last = 0; | 
|  | 951 | if (i < 0) | 
|  | 952 | i += len; | 
|  | 953 | if (i < 0) | 
|  | 954 | i = 0; | 
|  | 955 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 956 | if (dir > 0) { | 
|  | 957 | if (n == 0 && i <= last) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 958 | return (long)i; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 959 | last -= n; | 
|  | 960 | for (; i <= last; ++i) | 
| Fred Drake | 396f6e0 | 2000-06-20 15:47:54 +0000 | [diff] [blame] | 961 | if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 962 | return (long)i; | 
|  | 963 | } | 
|  | 964 | else { | 
|  | 965 | int j; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 966 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 967 | if (n == 0 && i <= last) | 
|  | 968 | return (long)last; | 
|  | 969 | for (j = last-n; j >= i; --j) | 
| Fred Drake | 396f6e0 | 2000-06-20 15:47:54 +0000 | [diff] [blame] | 970 | if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 971 | return (long)j; | 
|  | 972 | } | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 973 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 974 | return -1; | 
|  | 975 | } | 
|  | 976 |  | 
|  | 977 |  | 
|  | 978 | static char find__doc__[] = | 
|  | 979 | "S.find(sub [,start [,end]]) -> int\n\ | 
|  | 980 | \n\ | 
|  | 981 | Return the lowest index in S where substring sub is found,\n\ | 
|  | 982 | such that sub is contained within s[start,end].  Optional\n\ | 
|  | 983 | arguments start and end are interpreted as in slice notation.\n\ | 
|  | 984 | \n\ | 
|  | 985 | Return -1 on failure."; | 
|  | 986 |  | 
|  | 987 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 988 | string_find(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 989 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 990 | long result = string_find_internal(self, args, +1); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 991 | if (result == -2) | 
|  | 992 | return NULL; | 
|  | 993 | return PyInt_FromLong(result); | 
|  | 994 | } | 
|  | 995 |  | 
|  | 996 |  | 
|  | 997 | static char index__doc__[] = | 
|  | 998 | "S.index(sub [,start [,end]]) -> int\n\ | 
|  | 999 | \n\ | 
|  | 1000 | Like S.find() but raise ValueError when the substring is not found."; | 
|  | 1001 |  | 
|  | 1002 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1003 | string_index(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1004 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1005 | long result = string_find_internal(self, args, +1); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1006 | if (result == -2) | 
|  | 1007 | return NULL; | 
|  | 1008 | if (result == -1) { | 
|  | 1009 | PyErr_SetString(PyExc_ValueError, | 
|  | 1010 | "substring not found in string.index"); | 
|  | 1011 | return NULL; | 
|  | 1012 | } | 
|  | 1013 | return PyInt_FromLong(result); | 
|  | 1014 | } | 
|  | 1015 |  | 
|  | 1016 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1017 | static char rfind__doc__[] = | 
|  | 1018 | "S.rfind(sub [,start [,end]]) -> int\n\ | 
|  | 1019 | \n\ | 
|  | 1020 | Return the highest index in S where substring sub is found,\n\ | 
|  | 1021 | such that sub is contained within s[start,end].  Optional\n\ | 
|  | 1022 | arguments start and end are interpreted as in slice notation.\n\ | 
|  | 1023 | \n\ | 
|  | 1024 | Return -1 on failure."; | 
|  | 1025 |  | 
|  | 1026 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1027 | string_rfind(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1028 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1029 | long result = string_find_internal(self, args, -1); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1030 | if (result == -2) | 
|  | 1031 | return NULL; | 
|  | 1032 | return PyInt_FromLong(result); | 
|  | 1033 | } | 
|  | 1034 |  | 
|  | 1035 |  | 
|  | 1036 | static char rindex__doc__[] = | 
|  | 1037 | "S.rindex(sub [,start [,end]]) -> int\n\ | 
|  | 1038 | \n\ | 
|  | 1039 | Like S.rfind() but raise ValueError when the substring is not found."; | 
|  | 1040 |  | 
|  | 1041 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1042 | string_rindex(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1043 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1044 | long result = string_find_internal(self, args, -1); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1045 | if (result == -2) | 
|  | 1046 | return NULL; | 
|  | 1047 | if (result == -1) { | 
|  | 1048 | PyErr_SetString(PyExc_ValueError, | 
|  | 1049 | "substring not found in string.rindex"); | 
|  | 1050 | return NULL; | 
|  | 1051 | } | 
|  | 1052 | return PyInt_FromLong(result); | 
|  | 1053 | } | 
|  | 1054 |  | 
|  | 1055 |  | 
|  | 1056 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1057 | do_strip(PyStringObject *self, PyObject *args, int striptype) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1058 | { | 
|  | 1059 | char *s = PyString_AS_STRING(self); | 
|  | 1060 | int len = PyString_GET_SIZE(self), i, j; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1061 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 1062 | if (!PyArg_ParseTuple(args, ":strip")) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1063 | return NULL; | 
|  | 1064 |  | 
|  | 1065 | i = 0; | 
|  | 1066 | if (striptype != RIGHTSTRIP) { | 
|  | 1067 | while (i < len && isspace(Py_CHARMASK(s[i]))) { | 
|  | 1068 | i++; | 
|  | 1069 | } | 
|  | 1070 | } | 
|  | 1071 |  | 
|  | 1072 | j = len; | 
|  | 1073 | if (striptype != LEFTSTRIP) { | 
|  | 1074 | do { | 
|  | 1075 | j--; | 
|  | 1076 | } while (j >= i && isspace(Py_CHARMASK(s[j]))); | 
|  | 1077 | j++; | 
|  | 1078 | } | 
|  | 1079 |  | 
|  | 1080 | if (i == 0 && j == len) { | 
|  | 1081 | Py_INCREF(self); | 
|  | 1082 | return (PyObject*)self; | 
|  | 1083 | } | 
|  | 1084 | else | 
|  | 1085 | return PyString_FromStringAndSize(s+i, j-i); | 
|  | 1086 | } | 
|  | 1087 |  | 
|  | 1088 |  | 
|  | 1089 | static char strip__doc__[] = | 
|  | 1090 | "S.strip() -> string\n\ | 
|  | 1091 | \n\ | 
|  | 1092 | Return a copy of the string S with leading and trailing\n\ | 
|  | 1093 | whitespace removed."; | 
|  | 1094 |  | 
|  | 1095 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1096 | string_strip(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1097 | { | 
|  | 1098 | return do_strip(self, args, BOTHSTRIP); | 
|  | 1099 | } | 
|  | 1100 |  | 
|  | 1101 |  | 
|  | 1102 | static char lstrip__doc__[] = | 
|  | 1103 | "S.lstrip() -> string\n\ | 
|  | 1104 | \n\ | 
|  | 1105 | Return a copy of the string S with leading whitespace removed."; | 
|  | 1106 |  | 
|  | 1107 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1108 | string_lstrip(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1109 | { | 
|  | 1110 | return do_strip(self, args, LEFTSTRIP); | 
|  | 1111 | } | 
|  | 1112 |  | 
|  | 1113 |  | 
|  | 1114 | static char rstrip__doc__[] = | 
|  | 1115 | "S.rstrip() -> string\n\ | 
|  | 1116 | \n\ | 
|  | 1117 | Return a copy of the string S with trailing whitespace removed."; | 
|  | 1118 |  | 
|  | 1119 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1120 | string_rstrip(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1121 | { | 
|  | 1122 | return do_strip(self, args, RIGHTSTRIP); | 
|  | 1123 | } | 
|  | 1124 |  | 
|  | 1125 |  | 
|  | 1126 | static char lower__doc__[] = | 
|  | 1127 | "S.lower() -> string\n\ | 
|  | 1128 | \n\ | 
|  | 1129 | Return a copy of the string S converted to lowercase."; | 
|  | 1130 |  | 
|  | 1131 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1132 | string_lower(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1133 | { | 
|  | 1134 | char *s = PyString_AS_STRING(self), *s_new; | 
|  | 1135 | int i, n = PyString_GET_SIZE(self); | 
|  | 1136 | PyObject *new; | 
|  | 1137 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 1138 | if (!PyArg_ParseTuple(args, ":lower")) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1139 | return NULL; | 
|  | 1140 | new = PyString_FromStringAndSize(NULL, n); | 
|  | 1141 | if (new == NULL) | 
|  | 1142 | return NULL; | 
|  | 1143 | s_new = PyString_AsString(new); | 
|  | 1144 | for (i = 0; i < n; i++) { | 
|  | 1145 | int c = Py_CHARMASK(*s++); | 
|  | 1146 | if (isupper(c)) { | 
|  | 1147 | *s_new = tolower(c); | 
|  | 1148 | } else | 
|  | 1149 | *s_new = c; | 
|  | 1150 | s_new++; | 
|  | 1151 | } | 
|  | 1152 | return new; | 
|  | 1153 | } | 
|  | 1154 |  | 
|  | 1155 |  | 
|  | 1156 | static char upper__doc__[] = | 
|  | 1157 | "S.upper() -> string\n\ | 
|  | 1158 | \n\ | 
|  | 1159 | Return a copy of the string S converted to uppercase."; | 
|  | 1160 |  | 
|  | 1161 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1162 | string_upper(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1163 | { | 
|  | 1164 | char *s = PyString_AS_STRING(self), *s_new; | 
|  | 1165 | int i, n = PyString_GET_SIZE(self); | 
|  | 1166 | PyObject *new; | 
|  | 1167 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 1168 | if (!PyArg_ParseTuple(args, ":upper")) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1169 | return NULL; | 
|  | 1170 | new = PyString_FromStringAndSize(NULL, n); | 
|  | 1171 | if (new == NULL) | 
|  | 1172 | return NULL; | 
|  | 1173 | s_new = PyString_AsString(new); | 
|  | 1174 | for (i = 0; i < n; i++) { | 
|  | 1175 | int c = Py_CHARMASK(*s++); | 
|  | 1176 | if (islower(c)) { | 
|  | 1177 | *s_new = toupper(c); | 
|  | 1178 | } else | 
|  | 1179 | *s_new = c; | 
|  | 1180 | s_new++; | 
|  | 1181 | } | 
|  | 1182 | return new; | 
|  | 1183 | } | 
|  | 1184 |  | 
|  | 1185 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1186 | static char title__doc__[] = | 
|  | 1187 | "S.title() -> string\n\ | 
|  | 1188 | \n\ | 
|  | 1189 | Return a titlecased version of S, i.e. words start with uppercase\n\ | 
|  | 1190 | characters, all remaining cased characters have lowercase."; | 
|  | 1191 |  | 
|  | 1192 | static PyObject* | 
| Fred Drake | 49312a5 | 2000-12-06 14:27:49 +0000 | [diff] [blame] | 1193 | string_title(PyStringObject *self, PyObject *args) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1194 | { | 
|  | 1195 | char *s = PyString_AS_STRING(self), *s_new; | 
|  | 1196 | int i, n = PyString_GET_SIZE(self); | 
|  | 1197 | int previous_is_cased = 0; | 
|  | 1198 | PyObject *new; | 
|  | 1199 |  | 
|  | 1200 | if (!PyArg_ParseTuple(args, ":title")) | 
|  | 1201 | return NULL; | 
|  | 1202 | new = PyString_FromStringAndSize(NULL, n); | 
|  | 1203 | if (new == NULL) | 
|  | 1204 | return NULL; | 
|  | 1205 | s_new = PyString_AsString(new); | 
|  | 1206 | for (i = 0; i < n; i++) { | 
|  | 1207 | int c = Py_CHARMASK(*s++); | 
|  | 1208 | if (islower(c)) { | 
|  | 1209 | if (!previous_is_cased) | 
|  | 1210 | c = toupper(c); | 
|  | 1211 | previous_is_cased = 1; | 
|  | 1212 | } else if (isupper(c)) { | 
|  | 1213 | if (previous_is_cased) | 
|  | 1214 | c = tolower(c); | 
|  | 1215 | previous_is_cased = 1; | 
|  | 1216 | } else | 
|  | 1217 | previous_is_cased = 0; | 
|  | 1218 | *s_new++ = c; | 
|  | 1219 | } | 
|  | 1220 | return new; | 
|  | 1221 | } | 
|  | 1222 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1223 | static char capitalize__doc__[] = | 
|  | 1224 | "S.capitalize() -> string\n\ | 
|  | 1225 | \n\ | 
|  | 1226 | Return a copy of the string S with only its first character\n\ | 
|  | 1227 | capitalized."; | 
|  | 1228 |  | 
|  | 1229 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1230 | string_capitalize(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1231 | { | 
|  | 1232 | char *s = PyString_AS_STRING(self), *s_new; | 
|  | 1233 | int i, n = PyString_GET_SIZE(self); | 
|  | 1234 | PyObject *new; | 
|  | 1235 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 1236 | if (!PyArg_ParseTuple(args, ":capitalize")) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1237 | return NULL; | 
|  | 1238 | new = PyString_FromStringAndSize(NULL, n); | 
|  | 1239 | if (new == NULL) | 
|  | 1240 | return NULL; | 
|  | 1241 | s_new = PyString_AsString(new); | 
|  | 1242 | if (0 < n) { | 
|  | 1243 | int c = Py_CHARMASK(*s++); | 
|  | 1244 | if (islower(c)) | 
|  | 1245 | *s_new = toupper(c); | 
|  | 1246 | else | 
|  | 1247 | *s_new = c; | 
|  | 1248 | s_new++; | 
|  | 1249 | } | 
|  | 1250 | for (i = 1; i < n; i++) { | 
|  | 1251 | int c = Py_CHARMASK(*s++); | 
|  | 1252 | if (isupper(c)) | 
|  | 1253 | *s_new = tolower(c); | 
|  | 1254 | else | 
|  | 1255 | *s_new = c; | 
|  | 1256 | s_new++; | 
|  | 1257 | } | 
|  | 1258 | return new; | 
|  | 1259 | } | 
|  | 1260 |  | 
|  | 1261 |  | 
|  | 1262 | static char count__doc__[] = | 
|  | 1263 | "S.count(sub[, start[, end]]) -> int\n\ | 
|  | 1264 | \n\ | 
|  | 1265 | Return the number of occurrences of substring sub in string\n\ | 
|  | 1266 | S[start:end].  Optional arguments start and end are\n\ | 
|  | 1267 | interpreted as in slice notation."; | 
|  | 1268 |  | 
|  | 1269 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1270 | string_count(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1271 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1272 | const char *s = PyString_AS_STRING(self), *sub; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1273 | int len = PyString_GET_SIZE(self), n; | 
|  | 1274 | int i = 0, last = INT_MAX; | 
|  | 1275 | int m, r; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1276 | PyObject *subobj; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1277 |  | 
| Guido van Rossum | c682140 | 2000-05-08 14:08:05 +0000 | [diff] [blame] | 1278 | if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj, | 
|  | 1279 | _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1280 | return NULL; | 
| Guido van Rossum | c682140 | 2000-05-08 14:08:05 +0000 | [diff] [blame] | 1281 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1282 | if (PyString_Check(subobj)) { | 
|  | 1283 | sub = PyString_AS_STRING(subobj); | 
|  | 1284 | n = PyString_GET_SIZE(subobj); | 
|  | 1285 | } | 
| Marc-André Lemburg | 3a645e4 | 2001-01-16 11:54:12 +0000 | [diff] [blame] | 1286 | else if (PyUnicode_Check(subobj)) { | 
|  | 1287 | int count; | 
|  | 1288 | count = PyUnicode_Count((PyObject *)self, subobj, i, last); | 
|  | 1289 | if (count == -1) | 
|  | 1290 | return NULL; | 
|  | 1291 | else | 
|  | 1292 | return PyInt_FromLong((long) count); | 
|  | 1293 | } | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1294 | else if (PyObject_AsCharBuffer(subobj, &sub, &n)) | 
|  | 1295 | return NULL; | 
|  | 1296 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1297 | if (last > len) | 
|  | 1298 | last = len; | 
|  | 1299 | if (last < 0) | 
|  | 1300 | last += len; | 
|  | 1301 | if (last < 0) | 
|  | 1302 | last = 0; | 
|  | 1303 | if (i < 0) | 
|  | 1304 | i += len; | 
|  | 1305 | if (i < 0) | 
|  | 1306 | i = 0; | 
|  | 1307 | m = last + 1 - n; | 
|  | 1308 | if (n == 0) | 
|  | 1309 | return PyInt_FromLong((long) (m-i)); | 
|  | 1310 |  | 
|  | 1311 | r = 0; | 
|  | 1312 | while (i < m) { | 
|  | 1313 | if (!memcmp(s+i, sub, n)) { | 
|  | 1314 | r++; | 
|  | 1315 | i += n; | 
|  | 1316 | } else { | 
|  | 1317 | i++; | 
|  | 1318 | } | 
|  | 1319 | } | 
|  | 1320 | return PyInt_FromLong((long) r); | 
|  | 1321 | } | 
|  | 1322 |  | 
|  | 1323 |  | 
|  | 1324 | static char swapcase__doc__[] = | 
|  | 1325 | "S.swapcase() -> string\n\ | 
|  | 1326 | \n\ | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1327 | Return a copy of the string S with uppercase characters\n\ | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1328 | converted to lowercase and vice versa."; | 
|  | 1329 |  | 
|  | 1330 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1331 | string_swapcase(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1332 | { | 
|  | 1333 | char *s = PyString_AS_STRING(self), *s_new; | 
|  | 1334 | int i, n = PyString_GET_SIZE(self); | 
|  | 1335 | PyObject *new; | 
|  | 1336 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 1337 | if (!PyArg_ParseTuple(args, ":swapcase")) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1338 | return NULL; | 
|  | 1339 | new = PyString_FromStringAndSize(NULL, n); | 
|  | 1340 | if (new == NULL) | 
|  | 1341 | return NULL; | 
|  | 1342 | s_new = PyString_AsString(new); | 
|  | 1343 | for (i = 0; i < n; i++) { | 
|  | 1344 | int c = Py_CHARMASK(*s++); | 
|  | 1345 | if (islower(c)) { | 
|  | 1346 | *s_new = toupper(c); | 
|  | 1347 | } | 
|  | 1348 | else if (isupper(c)) { | 
|  | 1349 | *s_new = tolower(c); | 
|  | 1350 | } | 
|  | 1351 | else | 
|  | 1352 | *s_new = c; | 
|  | 1353 | s_new++; | 
|  | 1354 | } | 
|  | 1355 | return new; | 
|  | 1356 | } | 
|  | 1357 |  | 
|  | 1358 |  | 
|  | 1359 | static char translate__doc__[] = | 
|  | 1360 | "S.translate(table [,deletechars]) -> string\n\ | 
|  | 1361 | \n\ | 
|  | 1362 | Return a copy of the string S, where all characters occurring\n\ | 
|  | 1363 | in the optional argument deletechars are removed, and the\n\ | 
|  | 1364 | remaining characters have been mapped through the given\n\ | 
|  | 1365 | translation table, which must be a string of length 256."; | 
|  | 1366 |  | 
|  | 1367 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1368 | string_translate(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1369 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1370 | register char *input, *output; | 
|  | 1371 | register const char *table; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1372 | register int i, c, changed = 0; | 
|  | 1373 | PyObject *input_obj = (PyObject*)self; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1374 | const char *table1, *output_start, *del_table=NULL; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1375 | int inlen, tablen, dellen = 0; | 
|  | 1376 | PyObject *result; | 
|  | 1377 | int trans_table[256]; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1378 | PyObject *tableobj, *delobj = NULL; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1379 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1380 | if (!PyArg_ParseTuple(args, "O|O:translate", | 
|  | 1381 | &tableobj, &delobj)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1382 | return NULL; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1383 |  | 
|  | 1384 | if (PyString_Check(tableobj)) { | 
|  | 1385 | table1 = PyString_AS_STRING(tableobj); | 
|  | 1386 | tablen = PyString_GET_SIZE(tableobj); | 
|  | 1387 | } | 
|  | 1388 | else if (PyUnicode_Check(tableobj)) { | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1389 | /* Unicode .translate() does not support the deletechars | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1390 | parameter; instead a mapping to None will cause characters | 
|  | 1391 | to be deleted. */ | 
|  | 1392 | if (delobj != NULL) { | 
|  | 1393 | PyErr_SetString(PyExc_TypeError, | 
|  | 1394 | "deletions are implemented differently for unicode"); | 
|  | 1395 | return NULL; | 
|  | 1396 | } | 
|  | 1397 | return PyUnicode_Translate((PyObject *)self, tableobj, NULL); | 
|  | 1398 | } | 
|  | 1399 | else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1400 | return NULL; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1401 |  | 
|  | 1402 | if (delobj != NULL) { | 
|  | 1403 | if (PyString_Check(delobj)) { | 
|  | 1404 | del_table = PyString_AS_STRING(delobj); | 
|  | 1405 | dellen = PyString_GET_SIZE(delobj); | 
|  | 1406 | } | 
|  | 1407 | else if (PyUnicode_Check(delobj)) { | 
|  | 1408 | PyErr_SetString(PyExc_TypeError, | 
|  | 1409 | "deletions are implemented differently for unicode"); | 
|  | 1410 | return NULL; | 
|  | 1411 | } | 
|  | 1412 | else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) | 
|  | 1413 | return NULL; | 
|  | 1414 |  | 
|  | 1415 | if (tablen != 256) { | 
|  | 1416 | PyErr_SetString(PyExc_ValueError, | 
|  | 1417 | "translation table must be 256 characters long"); | 
|  | 1418 | return NULL; | 
|  | 1419 | } | 
|  | 1420 | } | 
|  | 1421 | else { | 
|  | 1422 | del_table = NULL; | 
|  | 1423 | dellen = 0; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1424 | } | 
|  | 1425 |  | 
|  | 1426 | table = table1; | 
|  | 1427 | inlen = PyString_Size(input_obj); | 
|  | 1428 | result = PyString_FromStringAndSize((char *)NULL, inlen); | 
|  | 1429 | if (result == NULL) | 
|  | 1430 | return NULL; | 
|  | 1431 | output_start = output = PyString_AsString(result); | 
|  | 1432 | input = PyString_AsString(input_obj); | 
|  | 1433 |  | 
|  | 1434 | if (dellen == 0) { | 
|  | 1435 | /* If no deletions are required, use faster code */ | 
|  | 1436 | for (i = inlen; --i >= 0; ) { | 
|  | 1437 | c = Py_CHARMASK(*input++); | 
|  | 1438 | if (Py_CHARMASK((*output++ = table[c])) != c) | 
|  | 1439 | changed = 1; | 
|  | 1440 | } | 
|  | 1441 | if (changed) | 
|  | 1442 | return result; | 
|  | 1443 | Py_DECREF(result); | 
|  | 1444 | Py_INCREF(input_obj); | 
|  | 1445 | return input_obj; | 
|  | 1446 | } | 
|  | 1447 |  | 
|  | 1448 | for (i = 0; i < 256; i++) | 
|  | 1449 | trans_table[i] = Py_CHARMASK(table[i]); | 
|  | 1450 |  | 
|  | 1451 | for (i = 0; i < dellen; i++) | 
|  | 1452 | trans_table[(int) Py_CHARMASK(del_table[i])] = -1; | 
|  | 1453 |  | 
|  | 1454 | for (i = inlen; --i >= 0; ) { | 
|  | 1455 | c = Py_CHARMASK(*input++); | 
|  | 1456 | if (trans_table[c] != -1) | 
|  | 1457 | if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) | 
|  | 1458 | continue; | 
|  | 1459 | changed = 1; | 
|  | 1460 | } | 
|  | 1461 | if (!changed) { | 
|  | 1462 | Py_DECREF(result); | 
|  | 1463 | Py_INCREF(input_obj); | 
|  | 1464 | return input_obj; | 
|  | 1465 | } | 
|  | 1466 | /* Fix the size of the resulting string */ | 
|  | 1467 | if (inlen > 0 &&_PyString_Resize(&result, output-output_start)) | 
|  | 1468 | return NULL; | 
|  | 1469 | return result; | 
|  | 1470 | } | 
|  | 1471 |  | 
|  | 1472 |  | 
|  | 1473 | /* What follows is used for implementing replace().  Perry Stoll. */ | 
|  | 1474 |  | 
|  | 1475 | /* | 
|  | 1476 | mymemfind | 
|  | 1477 |  | 
|  | 1478 | strstr replacement for arbitrary blocks of memory. | 
|  | 1479 |  | 
| Barry Warsaw | 51ac580 | 2000-03-20 16:36:48 +0000 | [diff] [blame] | 1480 | Locates the first occurrence in the memory pointed to by MEM of the | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1481 | contents of memory pointed to by PAT.  Returns the index into MEM if | 
|  | 1482 | found, or -1 if not found.  If len of PAT is greater than length of | 
|  | 1483 | MEM, the function returns -1. | 
|  | 1484 | */ | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1485 | static int | 
| Tim Peters | c2e7da9 | 2000-07-09 08:02:21 +0000 | [diff] [blame] | 1486 | mymemfind(const char *mem, int len, const char *pat, int pat_len) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1487 | { | 
|  | 1488 | register int ii; | 
|  | 1489 |  | 
|  | 1490 | /* pattern can not occur in the last pat_len-1 chars */ | 
|  | 1491 | len -= pat_len; | 
|  | 1492 |  | 
|  | 1493 | for (ii = 0; ii <= len; ii++) { | 
| Fred Drake | 396f6e0 | 2000-06-20 15:47:54 +0000 | [diff] [blame] | 1494 | if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) { | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1495 | return ii; | 
|  | 1496 | } | 
|  | 1497 | } | 
|  | 1498 | return -1; | 
|  | 1499 | } | 
|  | 1500 |  | 
|  | 1501 | /* | 
|  | 1502 | mymemcnt | 
|  | 1503 |  | 
|  | 1504 | Return the number of distinct times PAT is found in MEM. | 
|  | 1505 | meaning mem=1111 and pat==11 returns 2. | 
|  | 1506 | mem=11111 and pat==11 also return 2. | 
|  | 1507 | */ | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1508 | static int | 
| Tim Peters | c2e7da9 | 2000-07-09 08:02:21 +0000 | [diff] [blame] | 1509 | mymemcnt(const char *mem, int len, const char *pat, int pat_len) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1510 | { | 
|  | 1511 | register int offset = 0; | 
|  | 1512 | int nfound = 0; | 
|  | 1513 |  | 
|  | 1514 | while (len >= 0) { | 
|  | 1515 | offset = mymemfind(mem, len, pat, pat_len); | 
|  | 1516 | if (offset == -1) | 
|  | 1517 | break; | 
|  | 1518 | mem += offset + pat_len; | 
|  | 1519 | len -= offset + pat_len; | 
|  | 1520 | nfound++; | 
|  | 1521 | } | 
|  | 1522 | return nfound; | 
|  | 1523 | } | 
|  | 1524 |  | 
|  | 1525 | /* | 
|  | 1526 | mymemreplace | 
|  | 1527 |  | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 1528 | Return a string in which all occurrences of PAT in memory STR are | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1529 | replaced with SUB. | 
|  | 1530 |  | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 1531 | If length of PAT is less than length of STR or there are no occurrences | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1532 | of PAT in STR, then the original string is returned. Otherwise, a new | 
|  | 1533 | string is allocated here and returned. | 
|  | 1534 |  | 
|  | 1535 | on return, out_len is: | 
|  | 1536 | the length of output string, or | 
|  | 1537 | -1 if the input string is returned, or | 
|  | 1538 | unchanged if an error occurs (no memory). | 
|  | 1539 |  | 
|  | 1540 | return value is: | 
|  | 1541 | the new string allocated locally, or | 
|  | 1542 | NULL if an error occurred. | 
|  | 1543 | */ | 
|  | 1544 | static char * | 
| Tim Peters | c2e7da9 | 2000-07-09 08:02:21 +0000 | [diff] [blame] | 1545 | mymemreplace(const char *str, int len,		/* input string */ | 
|  | 1546 | const char *pat, int pat_len,	/* pattern string to find */ | 
|  | 1547 | const char *sub, int sub_len,	/* substitution string */ | 
|  | 1548 | int count,				/* number of replacements */ | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1549 | int *out_len) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1550 | { | 
|  | 1551 | char *out_s; | 
|  | 1552 | char *new_s; | 
|  | 1553 | int nfound, offset, new_len; | 
|  | 1554 |  | 
|  | 1555 | if (len == 0 || pat_len > len) | 
|  | 1556 | goto return_same; | 
|  | 1557 |  | 
|  | 1558 | /* find length of output string */ | 
|  | 1559 | nfound = mymemcnt(str, len, pat, pat_len); | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1560 | if (count < 0) | 
|  | 1561 | count = INT_MAX; | 
|  | 1562 | else if (nfound > count) | 
|  | 1563 | nfound = count; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1564 | if (nfound == 0) | 
|  | 1565 | goto return_same; | 
|  | 1566 | new_len = len + nfound*(sub_len - pat_len); | 
|  | 1567 |  | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 1568 | new_s = (char *)PyMem_MALLOC(new_len); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1569 | if (new_s == NULL) return NULL; | 
|  | 1570 |  | 
|  | 1571 | *out_len = new_len; | 
|  | 1572 | out_s = new_s; | 
|  | 1573 |  | 
|  | 1574 | while (len > 0) { | 
|  | 1575 | /* find index of next instance of pattern */ | 
|  | 1576 | offset = mymemfind(str, len, pat, pat_len); | 
|  | 1577 | /* if not found,  break out of loop */ | 
|  | 1578 | if (offset == -1) break; | 
|  | 1579 |  | 
|  | 1580 | /* copy non matching part of input string */ | 
|  | 1581 | memcpy(new_s, str, offset); /* copy part of str before pat */ | 
|  | 1582 | str += offset + pat_len; /* move str past pattern */ | 
|  | 1583 | len -= offset + pat_len; /* reduce length of str remaining */ | 
|  | 1584 |  | 
|  | 1585 | /* copy substitute into the output string */ | 
|  | 1586 | new_s += offset; /* move new_s to dest for sub string */ | 
|  | 1587 | memcpy(new_s, sub, sub_len); /* copy substring into new_s */ | 
|  | 1588 | new_s += sub_len; /* offset new_s past sub string */ | 
|  | 1589 |  | 
|  | 1590 | /* break when we've done count replacements */ | 
|  | 1591 | if (--count == 0) break; | 
|  | 1592 | } | 
|  | 1593 | /* copy any remaining values into output string */ | 
|  | 1594 | if (len > 0) | 
|  | 1595 | memcpy(new_s, str, len); | 
|  | 1596 | return out_s; | 
|  | 1597 |  | 
|  | 1598 | return_same: | 
|  | 1599 | *out_len = -1; | 
| Tim Peters | c2e7da9 | 2000-07-09 08:02:21 +0000 | [diff] [blame] | 1600 | return (char*)str;	/* have to cast away constness here */ | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1601 | } | 
|  | 1602 |  | 
|  | 1603 |  | 
|  | 1604 | static char replace__doc__[] = | 
|  | 1605 | "S.replace (old, new[, maxsplit]) -> string\n\ | 
|  | 1606 | \n\ | 
|  | 1607 | Return a copy of string S with all occurrences of substring\n\ | 
|  | 1608 | old replaced by new.  If the optional argument maxsplit is\n\ | 
|  | 1609 | given, only the first maxsplit occurrences are replaced."; | 
|  | 1610 |  | 
|  | 1611 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1612 | string_replace(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1613 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1614 | const char *str = PyString_AS_STRING(self), *sub, *repl; | 
|  | 1615 | char *new_s; | 
|  | 1616 | int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len; | 
|  | 1617 | int count = -1; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1618 | PyObject *new; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1619 | PyObject *subobj, *replobj; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1620 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1621 | if (!PyArg_ParseTuple(args, "OO|i:replace", | 
|  | 1622 | &subobj, &replobj, &count)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1623 | return NULL; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1624 |  | 
|  | 1625 | if (PyString_Check(subobj)) { | 
|  | 1626 | sub = PyString_AS_STRING(subobj); | 
|  | 1627 | sub_len = PyString_GET_SIZE(subobj); | 
|  | 1628 | } | 
|  | 1629 | else if (PyUnicode_Check(subobj)) | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1630 | return PyUnicode_Replace((PyObject *)self, | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1631 | subobj, replobj, count); | 
|  | 1632 | else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) | 
|  | 1633 | return NULL; | 
|  | 1634 |  | 
|  | 1635 | if (PyString_Check(replobj)) { | 
|  | 1636 | repl = PyString_AS_STRING(replobj); | 
|  | 1637 | repl_len = PyString_GET_SIZE(replobj); | 
|  | 1638 | } | 
|  | 1639 | else if (PyUnicode_Check(replobj)) | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1640 | return PyUnicode_Replace((PyObject *)self, | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1641 | subobj, replobj, count); | 
|  | 1642 | else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len)) | 
|  | 1643 | return NULL; | 
|  | 1644 |  | 
| Guido van Rossum | 96a45ad | 2000-03-13 15:56:08 +0000 | [diff] [blame] | 1645 | if (sub_len <= 0) { | 
| Barry Warsaw | 51ac580 | 2000-03-20 16:36:48 +0000 | [diff] [blame] | 1646 | PyErr_SetString(PyExc_ValueError, "empty pattern string"); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1647 | return NULL; | 
|  | 1648 | } | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1649 | new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1650 | if (new_s == NULL) { | 
|  | 1651 | PyErr_NoMemory(); | 
|  | 1652 | return NULL; | 
|  | 1653 | } | 
|  | 1654 | if (out_len == -1) { | 
|  | 1655 | /* we're returning another reference to self */ | 
|  | 1656 | new = (PyObject*)self; | 
|  | 1657 | Py_INCREF(new); | 
|  | 1658 | } | 
|  | 1659 | else { | 
|  | 1660 | new = PyString_FromStringAndSize(new_s, out_len); | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 1661 | PyMem_FREE(new_s); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1662 | } | 
|  | 1663 | return new; | 
|  | 1664 | } | 
|  | 1665 |  | 
|  | 1666 |  | 
|  | 1667 | static char startswith__doc__[] = | 
|  | 1668 | "S.startswith(prefix[, start[, end]]) -> int\n\ | 
|  | 1669 | \n\ | 
|  | 1670 | Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\ | 
|  | 1671 | optional start, test S beginning at that position.  With optional end, stop\n\ | 
|  | 1672 | comparing S at that position."; | 
|  | 1673 |  | 
|  | 1674 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1675 | string_startswith(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1676 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1677 | const char* str = PyString_AS_STRING(self); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1678 | int len = PyString_GET_SIZE(self); | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1679 | const char* prefix; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1680 | int plen; | 
|  | 1681 | int start = 0; | 
|  | 1682 | int end = -1; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1683 | PyObject *subobj; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1684 |  | 
| Guido van Rossum | c682140 | 2000-05-08 14:08:05 +0000 | [diff] [blame] | 1685 | if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj, | 
|  | 1686 | _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1687 | return NULL; | 
|  | 1688 | if (PyString_Check(subobj)) { | 
|  | 1689 | prefix = PyString_AS_STRING(subobj); | 
|  | 1690 | plen = PyString_GET_SIZE(subobj); | 
|  | 1691 | } | 
| Marc-André Lemburg | 3a645e4 | 2001-01-16 11:54:12 +0000 | [diff] [blame] | 1692 | else if (PyUnicode_Check(subobj)) { | 
|  | 1693 | int rc; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1694 | rc = PyUnicode_Tailmatch((PyObject *)self, | 
| Marc-André Lemburg | 3a645e4 | 2001-01-16 11:54:12 +0000 | [diff] [blame] | 1695 | subobj, start, end, -1); | 
|  | 1696 | if (rc == -1) | 
|  | 1697 | return NULL; | 
|  | 1698 | else | 
|  | 1699 | return PyInt_FromLong((long) rc); | 
|  | 1700 | } | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1701 | else if (PyObject_AsCharBuffer(subobj, &prefix, &plen)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1702 | return NULL; | 
|  | 1703 |  | 
|  | 1704 | /* adopt Java semantics for index out of range.  it is legal for | 
|  | 1705 | * offset to be == plen, but this only returns true if prefix is | 
|  | 1706 | * the empty string. | 
|  | 1707 | */ | 
|  | 1708 | if (start < 0 || start+plen > len) | 
|  | 1709 | return PyInt_FromLong(0); | 
|  | 1710 |  | 
|  | 1711 | if (!memcmp(str+start, prefix, plen)) { | 
|  | 1712 | /* did the match end after the specified end? */ | 
|  | 1713 | if (end < 0) | 
|  | 1714 | return PyInt_FromLong(1); | 
|  | 1715 | else if (end - start < plen) | 
|  | 1716 | return PyInt_FromLong(0); | 
|  | 1717 | else | 
|  | 1718 | return PyInt_FromLong(1); | 
|  | 1719 | } | 
|  | 1720 | else return PyInt_FromLong(0); | 
|  | 1721 | } | 
|  | 1722 |  | 
|  | 1723 |  | 
|  | 1724 | static char endswith__doc__[] = | 
|  | 1725 | "S.endswith(suffix[, start[, end]]) -> int\n\ | 
|  | 1726 | \n\ | 
|  | 1727 | Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\ | 
|  | 1728 | optional start, test S beginning at that position.  With optional end, stop\n\ | 
|  | 1729 | comparing S at that position."; | 
|  | 1730 |  | 
|  | 1731 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 1732 | string_endswith(PyStringObject *self, PyObject *args) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1733 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1734 | const char* str = PyString_AS_STRING(self); | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1735 | int len = PyString_GET_SIZE(self); | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1736 | const char* suffix; | 
|  | 1737 | int slen; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1738 | int start = 0; | 
|  | 1739 | int end = -1; | 
|  | 1740 | int lower, upper; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1741 | PyObject *subobj; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1742 |  | 
| Guido van Rossum | c682140 | 2000-05-08 14:08:05 +0000 | [diff] [blame] | 1743 | if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj, | 
|  | 1744 | _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1745 | return NULL; | 
|  | 1746 | if (PyString_Check(subobj)) { | 
|  | 1747 | suffix = PyString_AS_STRING(subobj); | 
|  | 1748 | slen = PyString_GET_SIZE(subobj); | 
|  | 1749 | } | 
| Marc-André Lemburg | 3a645e4 | 2001-01-16 11:54:12 +0000 | [diff] [blame] | 1750 | else if (PyUnicode_Check(subobj)) { | 
|  | 1751 | int rc; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1752 | rc = PyUnicode_Tailmatch((PyObject *)self, | 
| Marc-André Lemburg | 3a645e4 | 2001-01-16 11:54:12 +0000 | [diff] [blame] | 1753 | subobj, start, end, +1); | 
|  | 1754 | if (rc == -1) | 
|  | 1755 | return NULL; | 
|  | 1756 | else | 
|  | 1757 | return PyInt_FromLong((long) rc); | 
|  | 1758 | } | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1759 | else if (PyObject_AsCharBuffer(subobj, &suffix, &slen)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1760 | return NULL; | 
|  | 1761 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1762 | if (start < 0 || start > len || slen > len) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1763 | return PyInt_FromLong(0); | 
|  | 1764 |  | 
|  | 1765 | upper = (end >= 0 && end <= len) ? end : len; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1766 | lower = (upper - slen) > start ? (upper - slen) : start; | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1767 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1768 | if (upper-lower >= slen && !memcmp(str+lower, suffix, slen)) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 1769 | return PyInt_FromLong(1); | 
|  | 1770 | else return PyInt_FromLong(0); | 
|  | 1771 | } | 
|  | 1772 |  | 
|  | 1773 |  | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 1774 | static char encode__doc__[] = | 
|  | 1775 | "S.encode([encoding[,errors]]) -> string\n\ | 
|  | 1776 | \n\ | 
|  | 1777 | Return an encoded string version of S. Default encoding is the current\n\ | 
|  | 1778 | default string encoding. errors may be given to set a different error\n\ | 
|  | 1779 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\ | 
|  | 1780 | a ValueError. Other possible values are 'ignore' and 'replace'."; | 
|  | 1781 |  | 
|  | 1782 | static PyObject * | 
|  | 1783 | string_encode(PyStringObject *self, PyObject *args) | 
|  | 1784 | { | 
|  | 1785 | char *encoding = NULL; | 
|  | 1786 | char *errors = NULL; | 
|  | 1787 | if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) | 
|  | 1788 | return NULL; | 
|  | 1789 | return PyString_AsEncodedString((PyObject *)self, encoding, errors); | 
|  | 1790 | } | 
|  | 1791 |  | 
|  | 1792 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1793 | static char expandtabs__doc__[] = | 
|  | 1794 | "S.expandtabs([tabsize]) -> string\n\ | 
|  | 1795 | \n\ | 
|  | 1796 | Return a copy of S where all tab characters are expanded using spaces.\n\ | 
|  | 1797 | If tabsize is not given, a tab size of 8 characters is assumed."; | 
|  | 1798 |  | 
|  | 1799 | static PyObject* | 
|  | 1800 | string_expandtabs(PyStringObject *self, PyObject *args) | 
|  | 1801 | { | 
|  | 1802 | const char *e, *p; | 
|  | 1803 | char *q; | 
|  | 1804 | int i, j; | 
|  | 1805 | PyObject *u; | 
|  | 1806 | int tabsize = 8; | 
|  | 1807 |  | 
|  | 1808 | if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) | 
|  | 1809 | return NULL; | 
|  | 1810 |  | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 1811 | /* First pass: determine size of output string */ | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1812 | i = j = 0; | 
|  | 1813 | e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); | 
|  | 1814 | for (p = PyString_AS_STRING(self); p < e; p++) | 
|  | 1815 | if (*p == '\t') { | 
|  | 1816 | if (tabsize > 0) | 
|  | 1817 | j += tabsize - (j % tabsize); | 
|  | 1818 | } | 
|  | 1819 | else { | 
|  | 1820 | j++; | 
|  | 1821 | if (*p == '\n' || *p == '\r') { | 
|  | 1822 | i += j; | 
|  | 1823 | j = 0; | 
|  | 1824 | } | 
|  | 1825 | } | 
|  | 1826 |  | 
|  | 1827 | /* Second pass: create output string and fill it */ | 
|  | 1828 | u = PyString_FromStringAndSize(NULL, i + j); | 
|  | 1829 | if (!u) | 
|  | 1830 | return NULL; | 
|  | 1831 |  | 
|  | 1832 | j = 0; | 
|  | 1833 | q = PyString_AS_STRING(u); | 
|  | 1834 |  | 
|  | 1835 | for (p = PyString_AS_STRING(self); p < e; p++) | 
|  | 1836 | if (*p == '\t') { | 
|  | 1837 | if (tabsize > 0) { | 
|  | 1838 | i = tabsize - (j % tabsize); | 
|  | 1839 | j += i; | 
|  | 1840 | while (i--) | 
|  | 1841 | *q++ = ' '; | 
|  | 1842 | } | 
|  | 1843 | } | 
|  | 1844 | else { | 
|  | 1845 | j++; | 
|  | 1846 | *q++ = *p; | 
|  | 1847 | if (*p == '\n' || *p == '\r') | 
|  | 1848 | j = 0; | 
|  | 1849 | } | 
|  | 1850 |  | 
|  | 1851 | return u; | 
|  | 1852 | } | 
|  | 1853 |  | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1854 | static | 
|  | 1855 | PyObject *pad(PyStringObject *self, | 
|  | 1856 | int left, | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1857 | int right, | 
|  | 1858 | char fill) | 
|  | 1859 | { | 
|  | 1860 | PyObject *u; | 
|  | 1861 |  | 
|  | 1862 | if (left < 0) | 
|  | 1863 | left = 0; | 
|  | 1864 | if (right < 0) | 
|  | 1865 | right = 0; | 
|  | 1866 |  | 
|  | 1867 | if (left == 0 && right == 0) { | 
|  | 1868 | Py_INCREF(self); | 
|  | 1869 | return (PyObject *)self; | 
|  | 1870 | } | 
|  | 1871 |  | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1872 | u = PyString_FromStringAndSize(NULL, | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1873 | left + PyString_GET_SIZE(self) + right); | 
|  | 1874 | if (u) { | 
|  | 1875 | if (left) | 
|  | 1876 | memset(PyString_AS_STRING(u), fill, left); | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 1877 | memcpy(PyString_AS_STRING(u) + left, | 
|  | 1878 | PyString_AS_STRING(self), | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 1879 | PyString_GET_SIZE(self)); | 
|  | 1880 | if (right) | 
|  | 1881 | memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), | 
|  | 1882 | fill, right); | 
|  | 1883 | } | 
|  | 1884 |  | 
|  | 1885 | return u; | 
|  | 1886 | } | 
|  | 1887 |  | 
|  | 1888 | static char ljust__doc__[] = | 
|  | 1889 | "S.ljust(width) -> string\n\ | 
|  | 1890 | \n\ | 
|  | 1891 | Return S left justified in a string of length width. Padding is\n\ | 
|  | 1892 | done using spaces."; | 
|  | 1893 |  | 
|  | 1894 | static PyObject * | 
|  | 1895 | string_ljust(PyStringObject *self, PyObject *args) | 
|  | 1896 | { | 
|  | 1897 | int width; | 
|  | 1898 | if (!PyArg_ParseTuple(args, "i:ljust", &width)) | 
|  | 1899 | return NULL; | 
|  | 1900 |  | 
|  | 1901 | if (PyString_GET_SIZE(self) >= width) { | 
|  | 1902 | Py_INCREF(self); | 
|  | 1903 | return (PyObject*) self; | 
|  | 1904 | } | 
|  | 1905 |  | 
|  | 1906 | return pad(self, 0, width - PyString_GET_SIZE(self), ' '); | 
|  | 1907 | } | 
|  | 1908 |  | 
|  | 1909 |  | 
|  | 1910 | static char rjust__doc__[] = | 
|  | 1911 | "S.rjust(width) -> string\n\ | 
|  | 1912 | \n\ | 
|  | 1913 | Return S right justified in a string of length width. Padding is\n\ | 
|  | 1914 | done using spaces."; | 
|  | 1915 |  | 
|  | 1916 | static PyObject * | 
|  | 1917 | string_rjust(PyStringObject *self, PyObject *args) | 
|  | 1918 | { | 
|  | 1919 | int width; | 
|  | 1920 | if (!PyArg_ParseTuple(args, "i:rjust", &width)) | 
|  | 1921 | return NULL; | 
|  | 1922 |  | 
|  | 1923 | if (PyString_GET_SIZE(self) >= width) { | 
|  | 1924 | Py_INCREF(self); | 
|  | 1925 | return (PyObject*) self; | 
|  | 1926 | } | 
|  | 1927 |  | 
|  | 1928 | return pad(self, width - PyString_GET_SIZE(self), 0, ' '); | 
|  | 1929 | } | 
|  | 1930 |  | 
|  | 1931 |  | 
|  | 1932 | static char center__doc__[] = | 
|  | 1933 | "S.center(width) -> string\n\ | 
|  | 1934 | \n\ | 
|  | 1935 | Return S centered in a string of length width. Padding is done\n\ | 
|  | 1936 | using spaces."; | 
|  | 1937 |  | 
|  | 1938 | static PyObject * | 
|  | 1939 | string_center(PyStringObject *self, PyObject *args) | 
|  | 1940 | { | 
|  | 1941 | int marg, left; | 
|  | 1942 | int width; | 
|  | 1943 |  | 
|  | 1944 | if (!PyArg_ParseTuple(args, "i:center", &width)) | 
|  | 1945 | return NULL; | 
|  | 1946 |  | 
|  | 1947 | if (PyString_GET_SIZE(self) >= width) { | 
|  | 1948 | Py_INCREF(self); | 
|  | 1949 | return (PyObject*) self; | 
|  | 1950 | } | 
|  | 1951 |  | 
|  | 1952 | marg = width - PyString_GET_SIZE(self); | 
|  | 1953 | left = marg / 2 + (marg & width & 1); | 
|  | 1954 |  | 
|  | 1955 | return pad(self, left, marg - left, ' '); | 
|  | 1956 | } | 
|  | 1957 |  | 
|  | 1958 | #if 0 | 
|  | 1959 | static char zfill__doc__[] = | 
|  | 1960 | "S.zfill(width) -> string\n\ | 
|  | 1961 | \n\ | 
|  | 1962 | Pad a numeric string x with zeros on the left, to fill a field\n\ | 
|  | 1963 | of the specified width. The string x is never truncated."; | 
|  | 1964 |  | 
|  | 1965 | static PyObject * | 
|  | 1966 | string_zfill(PyStringObject *self, PyObject *args) | 
|  | 1967 | { | 
|  | 1968 | int fill; | 
|  | 1969 | PyObject *u; | 
|  | 1970 | char *str; | 
|  | 1971 |  | 
|  | 1972 | int width; | 
|  | 1973 | if (!PyArg_ParseTuple(args, "i:zfill", &width)) | 
|  | 1974 | return NULL; | 
|  | 1975 |  | 
|  | 1976 | if (PyString_GET_SIZE(self) >= width) { | 
|  | 1977 | Py_INCREF(self); | 
|  | 1978 | return (PyObject*) self; | 
|  | 1979 | } | 
|  | 1980 |  | 
|  | 1981 | fill = width - PyString_GET_SIZE(self); | 
|  | 1982 |  | 
|  | 1983 | u = pad(self, fill, 0, '0'); | 
|  | 1984 | if (u == NULL) | 
|  | 1985 | return NULL; | 
|  | 1986 |  | 
|  | 1987 | str = PyString_AS_STRING(u); | 
|  | 1988 | if (str[fill] == '+' || str[fill] == '-') { | 
|  | 1989 | /* move sign to beginning of string */ | 
|  | 1990 | str[0] = str[fill]; | 
|  | 1991 | str[fill] = '0'; | 
|  | 1992 | } | 
|  | 1993 |  | 
|  | 1994 | return u; | 
|  | 1995 | } | 
|  | 1996 | #endif | 
|  | 1997 |  | 
|  | 1998 | static char isspace__doc__[] = | 
|  | 1999 | "S.isspace() -> int\n\ | 
|  | 2000 | \n\ | 
|  | 2001 | Return 1 if there are only whitespace characters in S,\n\ | 
|  | 2002 | 0 otherwise."; | 
|  | 2003 |  | 
|  | 2004 | static PyObject* | 
|  | 2005 | string_isspace(PyStringObject *self, PyObject *args) | 
|  | 2006 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2007 | register const unsigned char *p | 
|  | 2008 | = (unsigned char *) PyString_AS_STRING(self); | 
| Guido van Rossum | b8f820c | 2000-05-05 20:44:24 +0000 | [diff] [blame] | 2009 | register const unsigned char *e; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2010 |  | 
|  | 2011 | if (!PyArg_NoArgs(args)) | 
|  | 2012 | return NULL; | 
|  | 2013 |  | 
|  | 2014 | /* Shortcut for single character strings */ | 
|  | 2015 | if (PyString_GET_SIZE(self) == 1 && | 
|  | 2016 | isspace(*p)) | 
|  | 2017 | return PyInt_FromLong(1); | 
|  | 2018 |  | 
| Marc-André Lemburg | 60bc809 | 2000-06-14 09:18:32 +0000 | [diff] [blame] | 2019 | /* Special case for empty strings */ | 
|  | 2020 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2021 | return PyInt_FromLong(0); | 
|  | 2022 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2023 | e = p + PyString_GET_SIZE(self); | 
|  | 2024 | for (; p < e; p++) { | 
|  | 2025 | if (!isspace(*p)) | 
|  | 2026 | return PyInt_FromLong(0); | 
|  | 2027 | } | 
|  | 2028 | return PyInt_FromLong(1); | 
|  | 2029 | } | 
|  | 2030 |  | 
|  | 2031 |  | 
| Marc-André Lemburg | 4027f8f | 2000-07-05 09:47:46 +0000 | [diff] [blame] | 2032 | static char isalpha__doc__[] = | 
|  | 2033 | "S.isalpha() -> int\n\ | 
|  | 2034 | \n\ | 
|  | 2035 | Return 1 if  all characters in S are alphabetic\n\ | 
|  | 2036 | and there is at least one character in S, 0 otherwise."; | 
|  | 2037 |  | 
|  | 2038 | static PyObject* | 
| Fred Drake | 49312a5 | 2000-12-06 14:27:49 +0000 | [diff] [blame] | 2039 | string_isalpha(PyStringObject *self, PyObject *args) | 
| Marc-André Lemburg | 4027f8f | 2000-07-05 09:47:46 +0000 | [diff] [blame] | 2040 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2041 | register const unsigned char *p | 
|  | 2042 | = (unsigned char *) PyString_AS_STRING(self); | 
| Marc-André Lemburg | 4027f8f | 2000-07-05 09:47:46 +0000 | [diff] [blame] | 2043 | register const unsigned char *e; | 
|  | 2044 |  | 
|  | 2045 | if (!PyArg_NoArgs(args)) | 
|  | 2046 | return NULL; | 
|  | 2047 |  | 
|  | 2048 | /* Shortcut for single character strings */ | 
|  | 2049 | if (PyString_GET_SIZE(self) == 1 && | 
|  | 2050 | isalpha(*p)) | 
|  | 2051 | return PyInt_FromLong(1); | 
|  | 2052 |  | 
|  | 2053 | /* Special case for empty strings */ | 
|  | 2054 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2055 | return PyInt_FromLong(0); | 
|  | 2056 |  | 
|  | 2057 | e = p + PyString_GET_SIZE(self); | 
|  | 2058 | for (; p < e; p++) { | 
|  | 2059 | if (!isalpha(*p)) | 
|  | 2060 | return PyInt_FromLong(0); | 
|  | 2061 | } | 
|  | 2062 | return PyInt_FromLong(1); | 
|  | 2063 | } | 
|  | 2064 |  | 
|  | 2065 |  | 
|  | 2066 | static char isalnum__doc__[] = | 
|  | 2067 | "S.isalnum() -> int\n\ | 
|  | 2068 | \n\ | 
|  | 2069 | Return 1 if  all characters in S are alphanumeric\n\ | 
|  | 2070 | and there is at least one character in S, 0 otherwise."; | 
|  | 2071 |  | 
|  | 2072 | static PyObject* | 
| Fred Drake | 49312a5 | 2000-12-06 14:27:49 +0000 | [diff] [blame] | 2073 | string_isalnum(PyStringObject *self, PyObject *args) | 
| Marc-André Lemburg | 4027f8f | 2000-07-05 09:47:46 +0000 | [diff] [blame] | 2074 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2075 | register const unsigned char *p | 
|  | 2076 | = (unsigned char *) PyString_AS_STRING(self); | 
| Marc-André Lemburg | 4027f8f | 2000-07-05 09:47:46 +0000 | [diff] [blame] | 2077 | register const unsigned char *e; | 
|  | 2078 |  | 
|  | 2079 | if (!PyArg_NoArgs(args)) | 
|  | 2080 | return NULL; | 
|  | 2081 |  | 
|  | 2082 | /* Shortcut for single character strings */ | 
|  | 2083 | if (PyString_GET_SIZE(self) == 1 && | 
|  | 2084 | isalnum(*p)) | 
|  | 2085 | return PyInt_FromLong(1); | 
|  | 2086 |  | 
|  | 2087 | /* Special case for empty strings */ | 
|  | 2088 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2089 | return PyInt_FromLong(0); | 
|  | 2090 |  | 
|  | 2091 | e = p + PyString_GET_SIZE(self); | 
|  | 2092 | for (; p < e; p++) { | 
|  | 2093 | if (!isalnum(*p)) | 
|  | 2094 | return PyInt_FromLong(0); | 
|  | 2095 | } | 
|  | 2096 | return PyInt_FromLong(1); | 
|  | 2097 | } | 
|  | 2098 |  | 
|  | 2099 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2100 | static char isdigit__doc__[] = | 
|  | 2101 | "S.isdigit() -> int\n\ | 
|  | 2102 | \n\ | 
|  | 2103 | Return 1 if there are only digit characters in S,\n\ | 
|  | 2104 | 0 otherwise."; | 
|  | 2105 |  | 
|  | 2106 | static PyObject* | 
|  | 2107 | string_isdigit(PyStringObject *self, PyObject *args) | 
|  | 2108 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2109 | register const unsigned char *p | 
|  | 2110 | = (unsigned char *) PyString_AS_STRING(self); | 
| Guido van Rossum | b8f820c | 2000-05-05 20:44:24 +0000 | [diff] [blame] | 2111 | register const unsigned char *e; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2112 |  | 
|  | 2113 | if (!PyArg_NoArgs(args)) | 
|  | 2114 | return NULL; | 
|  | 2115 |  | 
|  | 2116 | /* Shortcut for single character strings */ | 
|  | 2117 | if (PyString_GET_SIZE(self) == 1 && | 
|  | 2118 | isdigit(*p)) | 
|  | 2119 | return PyInt_FromLong(1); | 
|  | 2120 |  | 
| Marc-André Lemburg | 60bc809 | 2000-06-14 09:18:32 +0000 | [diff] [blame] | 2121 | /* Special case for empty strings */ | 
|  | 2122 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2123 | return PyInt_FromLong(0); | 
|  | 2124 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2125 | e = p + PyString_GET_SIZE(self); | 
|  | 2126 | for (; p < e; p++) { | 
|  | 2127 | if (!isdigit(*p)) | 
|  | 2128 | return PyInt_FromLong(0); | 
|  | 2129 | } | 
|  | 2130 | return PyInt_FromLong(1); | 
|  | 2131 | } | 
|  | 2132 |  | 
|  | 2133 |  | 
|  | 2134 | static char islower__doc__[] = | 
|  | 2135 | "S.islower() -> int\n\ | 
|  | 2136 | \n\ | 
|  | 2137 | Return 1 if  all cased characters in S are lowercase and there is\n\ | 
|  | 2138 | at least one cased character in S, 0 otherwise."; | 
|  | 2139 |  | 
|  | 2140 | static PyObject* | 
|  | 2141 | string_islower(PyStringObject *self, PyObject *args) | 
|  | 2142 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2143 | register const unsigned char *p | 
|  | 2144 | = (unsigned char *) PyString_AS_STRING(self); | 
| Guido van Rossum | b8f820c | 2000-05-05 20:44:24 +0000 | [diff] [blame] | 2145 | register const unsigned char *e; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2146 | int cased; | 
|  | 2147 |  | 
|  | 2148 | if (!PyArg_NoArgs(args)) | 
|  | 2149 | return NULL; | 
|  | 2150 |  | 
|  | 2151 | /* Shortcut for single character strings */ | 
|  | 2152 | if (PyString_GET_SIZE(self) == 1) | 
|  | 2153 | return PyInt_FromLong(islower(*p) != 0); | 
|  | 2154 |  | 
| Marc-André Lemburg | 60bc809 | 2000-06-14 09:18:32 +0000 | [diff] [blame] | 2155 | /* Special case for empty strings */ | 
|  | 2156 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2157 | return PyInt_FromLong(0); | 
|  | 2158 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2159 | e = p + PyString_GET_SIZE(self); | 
|  | 2160 | cased = 0; | 
|  | 2161 | for (; p < e; p++) { | 
|  | 2162 | if (isupper(*p)) | 
|  | 2163 | return PyInt_FromLong(0); | 
|  | 2164 | else if (!cased && islower(*p)) | 
|  | 2165 | cased = 1; | 
|  | 2166 | } | 
|  | 2167 | return PyInt_FromLong(cased); | 
|  | 2168 | } | 
|  | 2169 |  | 
|  | 2170 |  | 
|  | 2171 | static char isupper__doc__[] = | 
|  | 2172 | "S.isupper() -> int\n\ | 
|  | 2173 | \n\ | 
|  | 2174 | Return 1 if  all cased characters in S are uppercase and there is\n\ | 
|  | 2175 | at least one cased character in S, 0 otherwise."; | 
|  | 2176 |  | 
|  | 2177 | static PyObject* | 
|  | 2178 | string_isupper(PyStringObject *self, PyObject *args) | 
|  | 2179 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2180 | register const unsigned char *p | 
|  | 2181 | = (unsigned char *) PyString_AS_STRING(self); | 
| Guido van Rossum | b8f820c | 2000-05-05 20:44:24 +0000 | [diff] [blame] | 2182 | register const unsigned char *e; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2183 | int cased; | 
|  | 2184 |  | 
|  | 2185 | if (!PyArg_NoArgs(args)) | 
|  | 2186 | return NULL; | 
|  | 2187 |  | 
|  | 2188 | /* Shortcut for single character strings */ | 
|  | 2189 | if (PyString_GET_SIZE(self) == 1) | 
|  | 2190 | return PyInt_FromLong(isupper(*p) != 0); | 
|  | 2191 |  | 
| Marc-André Lemburg | 60bc809 | 2000-06-14 09:18:32 +0000 | [diff] [blame] | 2192 | /* Special case for empty strings */ | 
|  | 2193 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2194 | return PyInt_FromLong(0); | 
|  | 2195 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2196 | e = p + PyString_GET_SIZE(self); | 
|  | 2197 | cased = 0; | 
|  | 2198 | for (; p < e; p++) { | 
|  | 2199 | if (islower(*p)) | 
|  | 2200 | return PyInt_FromLong(0); | 
|  | 2201 | else if (!cased && isupper(*p)) | 
|  | 2202 | cased = 1; | 
|  | 2203 | } | 
|  | 2204 | return PyInt_FromLong(cased); | 
|  | 2205 | } | 
|  | 2206 |  | 
|  | 2207 |  | 
|  | 2208 | static char istitle__doc__[] = | 
|  | 2209 | "S.istitle() -> int\n\ | 
|  | 2210 | \n\ | 
|  | 2211 | Return 1 if S is a titlecased string, i.e. uppercase characters\n\ | 
|  | 2212 | may only follow uncased characters and lowercase characters only cased\n\ | 
|  | 2213 | ones. Return 0 otherwise."; | 
|  | 2214 |  | 
|  | 2215 | static PyObject* | 
|  | 2216 | string_istitle(PyStringObject *self, PyObject *args) | 
|  | 2217 | { | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2218 | register const unsigned char *p | 
|  | 2219 | = (unsigned char *) PyString_AS_STRING(self); | 
| Guido van Rossum | b8f820c | 2000-05-05 20:44:24 +0000 | [diff] [blame] | 2220 | register const unsigned char *e; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2221 | int cased, previous_is_cased; | 
|  | 2222 |  | 
|  | 2223 | if (!PyArg_NoArgs(args)) | 
|  | 2224 | return NULL; | 
|  | 2225 |  | 
|  | 2226 | /* Shortcut for single character strings */ | 
|  | 2227 | if (PyString_GET_SIZE(self) == 1) | 
|  | 2228 | return PyInt_FromLong(isupper(*p) != 0); | 
|  | 2229 |  | 
| Marc-André Lemburg | 60bc809 | 2000-06-14 09:18:32 +0000 | [diff] [blame] | 2230 | /* Special case for empty strings */ | 
|  | 2231 | if (PyString_GET_SIZE(self) == 0) | 
|  | 2232 | return PyInt_FromLong(0); | 
|  | 2233 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2234 | e = p + PyString_GET_SIZE(self); | 
|  | 2235 | cased = 0; | 
|  | 2236 | previous_is_cased = 0; | 
|  | 2237 | for (; p < e; p++) { | 
| Guido van Rossum | b8f820c | 2000-05-05 20:44:24 +0000 | [diff] [blame] | 2238 | register const unsigned char ch = *p; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2239 |  | 
|  | 2240 | if (isupper(ch)) { | 
|  | 2241 | if (previous_is_cased) | 
|  | 2242 | return PyInt_FromLong(0); | 
|  | 2243 | previous_is_cased = 1; | 
|  | 2244 | cased = 1; | 
|  | 2245 | } | 
|  | 2246 | else if (islower(ch)) { | 
|  | 2247 | if (!previous_is_cased) | 
|  | 2248 | return PyInt_FromLong(0); | 
|  | 2249 | previous_is_cased = 1; | 
|  | 2250 | cased = 1; | 
|  | 2251 | } | 
|  | 2252 | else | 
|  | 2253 | previous_is_cased = 0; | 
|  | 2254 | } | 
|  | 2255 | return PyInt_FromLong(cased); | 
|  | 2256 | } | 
|  | 2257 |  | 
|  | 2258 |  | 
|  | 2259 | static char splitlines__doc__[] = | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2260 | "S.splitlines([keepends]]) -> list of strings\n\ | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2261 | \n\ | 
|  | 2262 | Return a list of the lines in S, breaking at line boundaries.\n\ | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2263 | Line breaks are not included in the resulting list unless keepends\n\ | 
|  | 2264 | is given and true."; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2265 |  | 
|  | 2266 | #define SPLIT_APPEND(data, left, right)					\ | 
|  | 2267 | str = PyString_FromStringAndSize(data + left, right - left);	\ | 
|  | 2268 | if (!str)							\ | 
|  | 2269 | goto onError;						\ | 
|  | 2270 | if (PyList_Append(list, str)) {					\ | 
|  | 2271 | Py_DECREF(str);						\ | 
|  | 2272 | goto onError;						\ | 
|  | 2273 | }								\ | 
|  | 2274 | else								\ | 
|  | 2275 | Py_DECREF(str); | 
|  | 2276 |  | 
|  | 2277 | static PyObject* | 
|  | 2278 | string_splitlines(PyStringObject *self, PyObject *args) | 
|  | 2279 | { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2280 | register int i; | 
|  | 2281 | register int j; | 
|  | 2282 | int len; | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2283 | int keepends = 0; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2284 | PyObject *list; | 
|  | 2285 | PyObject *str; | 
|  | 2286 | char *data; | 
|  | 2287 |  | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2288 | if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2289 | return NULL; | 
|  | 2290 |  | 
|  | 2291 | data = PyString_AS_STRING(self); | 
|  | 2292 | len = PyString_GET_SIZE(self); | 
|  | 2293 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2294 | list = PyList_New(0); | 
|  | 2295 | if (!list) | 
|  | 2296 | goto onError; | 
|  | 2297 |  | 
|  | 2298 | for (i = j = 0; i < len; ) { | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2299 | int eol; | 
|  | 2300 |  | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2301 | /* Find a line and append it */ | 
|  | 2302 | while (i < len && data[i] != '\n' && data[i] != '\r') | 
|  | 2303 | i++; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2304 |  | 
|  | 2305 | /* Skip the line break reading CRLF as one line break */ | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2306 | eol = i; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2307 | if (i < len) { | 
|  | 2308 | if (data[i] == '\r' && i + 1 < len && | 
|  | 2309 | data[i+1] == '\n') | 
|  | 2310 | i += 2; | 
|  | 2311 | else | 
|  | 2312 | i++; | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2313 | if (keepends) | 
|  | 2314 | eol = i; | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2315 | } | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2316 | SPLIT_APPEND(data, j, eol); | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2317 | j = i; | 
|  | 2318 | } | 
|  | 2319 | if (j < len) { | 
|  | 2320 | SPLIT_APPEND(data, j, len); | 
|  | 2321 | } | 
|  | 2322 |  | 
|  | 2323 | return list; | 
|  | 2324 |  | 
|  | 2325 | onError: | 
|  | 2326 | Py_DECREF(list); | 
|  | 2327 | return NULL; | 
|  | 2328 | } | 
|  | 2329 |  | 
|  | 2330 | #undef SPLIT_APPEND | 
|  | 2331 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2332 |  | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2333 | static PyMethodDef | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2334 | string_methods[] = { | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2335 | /* Counterparts of the obsolete stropmodule functions; except | 
|  | 2336 | string.maketrans(). */ | 
|  | 2337 | {"join",       (PyCFunction)string_join,       1, join__doc__}, | 
|  | 2338 | {"split",       (PyCFunction)string_split,       1, split__doc__}, | 
|  | 2339 | {"lower",      (PyCFunction)string_lower,      1, lower__doc__}, | 
|  | 2340 | {"upper",       (PyCFunction)string_upper,       1, upper__doc__}, | 
|  | 2341 | {"islower", (PyCFunction)string_islower, 0, islower__doc__}, | 
|  | 2342 | {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__}, | 
|  | 2343 | {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__}, | 
|  | 2344 | {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__}, | 
|  | 2345 | {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__}, | 
| Marc-André Lemburg | 4027f8f | 2000-07-05 09:47:46 +0000 | [diff] [blame] | 2346 | {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__}, | 
|  | 2347 | {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__}, | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2348 | {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__}, | 
|  | 2349 | {"count",      (PyCFunction)string_count,      1, count__doc__}, | 
|  | 2350 | {"endswith",   (PyCFunction)string_endswith,   1, endswith__doc__}, | 
|  | 2351 | {"find",       (PyCFunction)string_find,       1, find__doc__}, | 
|  | 2352 | {"index",      (PyCFunction)string_index,      1, index__doc__}, | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2353 | {"lstrip",     (PyCFunction)string_lstrip,     1, lstrip__doc__}, | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2354 | {"replace",     (PyCFunction)string_replace,     1, replace__doc__}, | 
|  | 2355 | {"rfind",       (PyCFunction)string_rfind,       1, rfind__doc__}, | 
|  | 2356 | {"rindex",      (PyCFunction)string_rindex,      1, rindex__doc__}, | 
|  | 2357 | {"rstrip",      (PyCFunction)string_rstrip,      1, rstrip__doc__}, | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2358 | {"startswith",  (PyCFunction)string_startswith,  1, startswith__doc__}, | 
|  | 2359 | {"strip",       (PyCFunction)string_strip,       1, strip__doc__}, | 
|  | 2360 | {"swapcase",    (PyCFunction)string_swapcase,    1, swapcase__doc__}, | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2361 | {"translate",   (PyCFunction)string_translate,   1, translate__doc__}, | 
|  | 2362 | {"title",       (PyCFunction)string_title,       1, title__doc__}, | 
|  | 2363 | {"ljust",       (PyCFunction)string_ljust,       1, ljust__doc__}, | 
|  | 2364 | {"rjust",       (PyCFunction)string_rjust,       1, rjust__doc__}, | 
|  | 2365 | {"center",      (PyCFunction)string_center,      1, center__doc__}, | 
| Marc-André Lemburg | 63f3d17 | 2000-07-06 11:29:01 +0000 | [diff] [blame] | 2366 | {"encode",      (PyCFunction)string_encode,      1, encode__doc__}, | 
| Guido van Rossum | 4c08d55 | 2000-03-10 22:55:18 +0000 | [diff] [blame] | 2367 | {"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__}, | 
|  | 2368 | {"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__}, | 
|  | 2369 | #if 0 | 
|  | 2370 | {"zfill",       (PyCFunction)string_zfill,       1, zfill__doc__}, | 
|  | 2371 | #endif | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2372 | {NULL,     NULL}		     /* sentinel */ | 
|  | 2373 | }; | 
|  | 2374 |  | 
|  | 2375 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2376 | string_getattr(PyStringObject *s, char *name) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2377 | { | 
|  | 2378 | return Py_FindMethod(string_methods, (PyObject*)s, name); | 
|  | 2379 | } | 
|  | 2380 |  | 
|  | 2381 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2382 | PyTypeObject PyString_Type = { | 
|  | 2383 | PyObject_HEAD_INIT(&PyType_Type) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2384 | 0, | 
|  | 2385 | "string", | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2386 | sizeof(PyStringObject), | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2387 | sizeof(char), | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2388 | (destructor)string_dealloc, /*tp_dealloc*/ | 
|  | 2389 | (printfunc)string_print, /*tp_print*/ | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 2390 | (getattrfunc)string_getattr,		/*tp_getattr*/ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2391 | 0,		/*tp_setattr*/ | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2392 | (cmpfunc)string_compare, /*tp_compare*/ | 
|  | 2393 | (reprfunc)string_repr, /*tp_repr*/ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2394 | 0,		/*tp_as_number*/ | 
|  | 2395 | &string_as_sequence,	/*tp_as_sequence*/ | 
|  | 2396 | 0,		/*tp_as_mapping*/ | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2397 | (hashfunc)string_hash, /*tp_hash*/ | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 2398 | 0,		/*tp_call*/ | 
| Guido van Rossum | 189f1df | 2001-05-01 16:51:53 +0000 | [diff] [blame] | 2399 | (reprfunc)string_str,	/*tp_str*/ | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 2400 | 0,		/*tp_getattro*/ | 
|  | 2401 | 0,		/*tp_setattro*/ | 
| Guido van Rossum | fdf95dd | 1997-05-05 22:15:02 +0000 | [diff] [blame] | 2402 | &string_as_buffer,	/*tp_as_buffer*/ | 
| Guido van Rossum | 1db7070 | 1998-10-08 02:18:52 +0000 | [diff] [blame] | 2403 | Py_TPFLAGS_DEFAULT,	/*tp_flags*/ | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 2404 | 0,		/*tp_doc*/ | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2405 | }; | 
|  | 2406 |  | 
|  | 2407 | void | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2408 | PyString_Concat(register PyObject **pv, register PyObject *w) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2409 | { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2410 | register PyObject *v; | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2411 | if (*pv == NULL) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2412 | return; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2413 | if (w == NULL || !PyString_Check(*pv)) { | 
|  | 2414 | Py_DECREF(*pv); | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2415 | *pv = NULL; | 
|  | 2416 | return; | 
|  | 2417 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2418 | v = string_concat((PyStringObject *) *pv, w); | 
|  | 2419 | Py_DECREF(*pv); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2420 | *pv = v; | 
|  | 2421 | } | 
|  | 2422 |  | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2423 | void | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2424 | PyString_ConcatAndDel(register PyObject **pv, register PyObject *w) | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2425 | { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2426 | PyString_Concat(pv, w); | 
|  | 2427 | Py_XDECREF(w); | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2428 | } | 
|  | 2429 |  | 
|  | 2430 |  | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2431 | /* The following function breaks the notion that strings are immutable: | 
|  | 2432 | it changes the size of a string.  We get away with this only if there | 
|  | 2433 | is only one module referencing the object.  You can also think of it | 
|  | 2434 | as creating a new string object and destroying the old one, only | 
|  | 2435 | more efficiently.  In any case, don't use this if the string may | 
|  | 2436 | already be known to some other part of the code... */ | 
|  | 2437 |  | 
|  | 2438 | int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2439 | _PyString_Resize(PyObject **pv, int newsize) | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2440 | { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2441 | register PyObject *v; | 
|  | 2442 | register PyStringObject *sv; | 
| Guido van Rossum | 921842f | 1990-11-18 17:30:23 +0000 | [diff] [blame] | 2443 | v = *pv; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2444 | if (!PyString_Check(v) || v->ob_refcnt != 1) { | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2445 | *pv = 0; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2446 | Py_DECREF(v); | 
|  | 2447 | PyErr_BadInternalCall(); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 2448 | return -1; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2449 | } | 
| Guido van Rossum | 921842f | 1990-11-18 17:30:23 +0000 | [diff] [blame] | 2450 | /* XXX UNREF/NEWREF interface should be more symmetrical */ | 
| Guido van Rossum | 441e4ab | 1996-05-23 22:46:51 +0000 | [diff] [blame] | 2451 | #ifdef Py_REF_DEBUG | 
| Guido van Rossum | 6f9e433 | 1995-03-29 16:57:48 +0000 | [diff] [blame] | 2452 | --_Py_RefTotal; | 
| Guido van Rossum | 921842f | 1990-11-18 17:30:23 +0000 | [diff] [blame] | 2453 | #endif | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2454 | _Py_ForgetReference(v); | 
|  | 2455 | *pv = (PyObject *) | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 2456 | PyObject_REALLOC((char *)v, | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2457 | sizeof(PyStringObject) + newsize * sizeof(char)); | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2458 | if (*pv == NULL) { | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 2459 | PyObject_DEL(v); | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2460 | PyErr_NoMemory(); | 
| Guido van Rossum | 2a9096b | 1990-10-21 22:15:08 +0000 | [diff] [blame] | 2461 | return -1; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2462 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2463 | _Py_NewReference(*pv); | 
|  | 2464 | sv = (PyStringObject *) *pv; | 
| Guido van Rossum | 921842f | 1990-11-18 17:30:23 +0000 | [diff] [blame] | 2465 | sv->ob_size = newsize; | 
|  | 2466 | sv->ob_sval[newsize] = '\0'; | 
| Guido van Rossum | 85a5fbb | 1990-10-14 12:07:46 +0000 | [diff] [blame] | 2467 | return 0; | 
|  | 2468 | } | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2469 |  | 
|  | 2470 | /* Helpers for formatstring */ | 
|  | 2471 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2472 | static PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2473 | getnextarg(PyObject *args, int arglen, int *p_argidx) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2474 | { | 
|  | 2475 | int argidx = *p_argidx; | 
|  | 2476 | if (argidx < arglen) { | 
|  | 2477 | (*p_argidx)++; | 
|  | 2478 | if (arglen < 0) | 
|  | 2479 | return args; | 
|  | 2480 | else | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2481 | return PyTuple_GetItem(args, argidx); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2482 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2483 | PyErr_SetString(PyExc_TypeError, | 
|  | 2484 | "not enough arguments for format string"); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2485 | return NULL; | 
|  | 2486 | } | 
|  | 2487 |  | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2488 | /* Format codes | 
|  | 2489 | * F_LJUST	'-' | 
|  | 2490 | * F_SIGN	'+' | 
|  | 2491 | * F_BLANK	' ' | 
|  | 2492 | * F_ALT	'#' | 
|  | 2493 | * F_ZERO	'0' | 
|  | 2494 | */ | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2495 | #define F_LJUST (1<<0) | 
|  | 2496 | #define F_SIGN	(1<<1) | 
|  | 2497 | #define F_BLANK (1<<2) | 
|  | 2498 | #define F_ALT	(1<<3) | 
|  | 2499 | #define F_ZERO	(1<<4) | 
|  | 2500 |  | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2501 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2502 | formatfloat(char *buf, size_t buflen, int flags, | 
|  | 2503 | int prec, int type, PyObject *v) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2504 | { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2505 | /* fmt = '%#.' + `prec` + `type` | 
|  | 2506 | worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2507 | char fmt[20]; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2508 | double x; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2509 | if (!PyArg_Parse(v, "d;float argument required", &x)) | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2510 | return -1; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2511 | if (prec < 0) | 
|  | 2512 | prec = 6; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2513 | if (type == 'f' && fabs(x)/1e25 >= 1e25) | 
|  | 2514 | type = 'g'; | 
|  | 2515 | sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2516 | /* worst case length calc to ensure no buffer overrun: | 
|  | 2517 | fmt = %#.<prec>g | 
|  | 2518 | buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2519 | for any double rep.) | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2520 | len = 1 + prec + 1 + 2 + 5 = 9 + prec | 
|  | 2521 | If prec=0 the effective precision is 1 (the leading digit is | 
|  | 2522 | always given), therefore increase by one to 10+prec. */ | 
|  | 2523 | if (buflen <= (size_t)10 + (size_t)prec) { | 
|  | 2524 | PyErr_SetString(PyExc_OverflowError, | 
| Fred Drake | 661ea26 | 2000-10-24 19:57:45 +0000 | [diff] [blame] | 2525 | "formatted float is too long (precision too large?)"); | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2526 | return -1; | 
|  | 2527 | } | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2528 | sprintf(buf, fmt, x); | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2529 | return strlen(buf); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2530 | } | 
|  | 2531 |  | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2532 | /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and | 
|  | 2533 | * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for | 
|  | 2534 | * Python's regular ints. | 
|  | 2535 | * Return value:  a new PyString*, or NULL if error. | 
|  | 2536 | *  .  *pbuf is set to point into it, | 
|  | 2537 | *     *plen set to the # of chars following that. | 
|  | 2538 | *     Caller must decref it when done using pbuf. | 
|  | 2539 | *     The string starting at *pbuf is of the form | 
|  | 2540 | *         "-"? ("0x" | "0X")? digit+ | 
|  | 2541 | *     "0x"/"0X" are present only for x and X conversions, with F_ALT | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2542 | *         set in flags.  The case of hex digits will be correct, | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2543 | *     There will be at least prec digits, zero-filled on the left if | 
|  | 2544 | *         necessary to get that many. | 
|  | 2545 | * val		object to be converted | 
|  | 2546 | * flags	bitmask of format flags; only F_ALT is looked at | 
|  | 2547 | * prec		minimum number of digits; 0-fill on left if needed | 
|  | 2548 | * type		a character in [duoxX]; u acts the same as d | 
|  | 2549 | * | 
|  | 2550 | * CAUTION:  o, x and X conversions on regular ints can never | 
|  | 2551 | * produce a '-' sign, but can for Python's unbounded ints. | 
|  | 2552 | */ | 
|  | 2553 | PyObject* | 
|  | 2554 | _PyString_FormatLong(PyObject *val, int flags, int prec, int type, | 
|  | 2555 | char **pbuf, int *plen) | 
|  | 2556 | { | 
|  | 2557 | PyObject *result = NULL; | 
|  | 2558 | char *buf; | 
|  | 2559 | int i; | 
|  | 2560 | int sign;	/* 1 if '-', else 0 */ | 
|  | 2561 | int len;	/* number of characters */ | 
|  | 2562 | int numdigits;	/* len == numnondigits + numdigits */ | 
|  | 2563 | int numnondigits = 0; | 
|  | 2564 |  | 
|  | 2565 | switch (type) { | 
|  | 2566 | case 'd': | 
|  | 2567 | case 'u': | 
|  | 2568 | result = val->ob_type->tp_str(val); | 
|  | 2569 | break; | 
|  | 2570 | case 'o': | 
|  | 2571 | result = val->ob_type->tp_as_number->nb_oct(val); | 
|  | 2572 | break; | 
|  | 2573 | case 'x': | 
|  | 2574 | case 'X': | 
|  | 2575 | numnondigits = 2; | 
|  | 2576 | result = val->ob_type->tp_as_number->nb_hex(val); | 
|  | 2577 | break; | 
|  | 2578 | default: | 
|  | 2579 | assert(!"'type' not in [duoxX]"); | 
|  | 2580 | } | 
|  | 2581 | if (!result) | 
|  | 2582 | return NULL; | 
|  | 2583 |  | 
|  | 2584 | /* To modify the string in-place, there can only be one reference. */ | 
|  | 2585 | if (result->ob_refcnt != 1) { | 
|  | 2586 | PyErr_BadInternalCall(); | 
|  | 2587 | return NULL; | 
|  | 2588 | } | 
|  | 2589 | buf = PyString_AsString(result); | 
|  | 2590 | len = PyString_Size(result); | 
|  | 2591 | if (buf[len-1] == 'L') { | 
|  | 2592 | --len; | 
|  | 2593 | buf[len] = '\0'; | 
|  | 2594 | } | 
|  | 2595 | sign = buf[0] == '-'; | 
|  | 2596 | numnondigits += sign; | 
|  | 2597 | numdigits = len - numnondigits; | 
|  | 2598 | assert(numdigits > 0); | 
|  | 2599 |  | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 2600 | /* Get rid of base marker unless F_ALT */ | 
|  | 2601 | if ((flags & F_ALT) == 0) { | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2602 | /* Need to skip 0x, 0X or 0. */ | 
|  | 2603 | int skipped = 0; | 
|  | 2604 | switch (type) { | 
|  | 2605 | case 'o': | 
|  | 2606 | assert(buf[sign] == '0'); | 
|  | 2607 | /* If 0 is only digit, leave it alone. */ | 
|  | 2608 | if (numdigits > 1) { | 
|  | 2609 | skipped = 1; | 
|  | 2610 | --numdigits; | 
|  | 2611 | } | 
|  | 2612 | break; | 
|  | 2613 | case 'x': | 
|  | 2614 | case 'X': | 
|  | 2615 | assert(buf[sign] == '0'); | 
|  | 2616 | assert(buf[sign + 1] == 'x'); | 
|  | 2617 | skipped = 2; | 
|  | 2618 | numnondigits -= 2; | 
|  | 2619 | break; | 
|  | 2620 | } | 
|  | 2621 | if (skipped) { | 
|  | 2622 | buf += skipped; | 
|  | 2623 | len -= skipped; | 
|  | 2624 | if (sign) | 
|  | 2625 | buf[0] = '-'; | 
|  | 2626 | } | 
|  | 2627 | assert(len == numnondigits + numdigits); | 
|  | 2628 | assert(numdigits > 0); | 
|  | 2629 | } | 
|  | 2630 |  | 
|  | 2631 | /* Fill with leading zeroes to meet minimum width. */ | 
|  | 2632 | if (prec > numdigits) { | 
|  | 2633 | PyObject *r1 = PyString_FromStringAndSize(NULL, | 
|  | 2634 | numnondigits + prec); | 
|  | 2635 | char *b1; | 
|  | 2636 | if (!r1) { | 
|  | 2637 | Py_DECREF(result); | 
|  | 2638 | return NULL; | 
|  | 2639 | } | 
|  | 2640 | b1 = PyString_AS_STRING(r1); | 
|  | 2641 | for (i = 0; i < numnondigits; ++i) | 
|  | 2642 | *b1++ = *buf++; | 
|  | 2643 | for (i = 0; i < prec - numdigits; i++) | 
|  | 2644 | *b1++ = '0'; | 
|  | 2645 | for (i = 0; i < numdigits; i++) | 
|  | 2646 | *b1++ = *buf++; | 
|  | 2647 | *b1 = '\0'; | 
|  | 2648 | Py_DECREF(result); | 
|  | 2649 | result = r1; | 
|  | 2650 | buf = PyString_AS_STRING(result); | 
|  | 2651 | len = numnondigits + prec; | 
|  | 2652 | } | 
|  | 2653 |  | 
|  | 2654 | /* Fix up case for hex conversions. */ | 
|  | 2655 | switch (type) { | 
|  | 2656 | case 'x': | 
|  | 2657 | /* Need to convert all upper case letters to lower case. */ | 
|  | 2658 | for (i = 0; i < len; i++) | 
|  | 2659 | if (buf[i] >= 'A' && buf[i] <= 'F') | 
|  | 2660 | buf[i] += 'a'-'A'; | 
|  | 2661 | break; | 
|  | 2662 | case 'X': | 
|  | 2663 | /* Need to convert 0x to 0X (and -0x to -0X). */ | 
|  | 2664 | if (buf[sign + 1] == 'x') | 
|  | 2665 | buf[sign + 1] = 'X'; | 
|  | 2666 | break; | 
|  | 2667 | } | 
|  | 2668 | *pbuf = buf; | 
|  | 2669 | *plen = len; | 
|  | 2670 | return result; | 
|  | 2671 | } | 
|  | 2672 |  | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2673 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2674 | formatint(char *buf, size_t buflen, int flags, | 
|  | 2675 | int prec, int type, PyObject *v) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2676 | { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2677 | /* fmt = '%#.' + `prec` + 'l' + `type` | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2678 | worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) | 
|  | 2679 | + 1 + 1 = 24 */ | 
|  | 2680 | char fmt[64];	/* plenty big enough! */ | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2681 | long x; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2682 | if (!PyArg_Parse(v, "l;int argument required", &x)) | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2683 | return -1; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2684 | if (prec < 0) | 
|  | 2685 | prec = 1; | 
|  | 2686 | sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type); | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2687 | /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal)) | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2688 | worst case buf = '0x' + [0-9]*prec, where prec >= 11 */ | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2689 | if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2690 | PyErr_SetString(PyExc_OverflowError, | 
| Fred Drake | 661ea26 | 2000-10-24 19:57:45 +0000 | [diff] [blame] | 2691 | "formatted integer is too long (precision too large?)"); | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2692 | return -1; | 
|  | 2693 | } | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2694 | sprintf(buf, fmt, x); | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 2695 | /* When converting 0 under %#x or %#X, C leaves off the base marker, | 
|  | 2696 | * but we want it (for consistency with other %#x conversions, and | 
|  | 2697 | * for consistency with Python's hex() function). | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2698 | * BUG 28-Apr-2001 tim:  At least two platform Cs (Metrowerks & | 
|  | 2699 | * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway. | 
|  | 2700 | * So add it only if the platform didn't already. | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 2701 | */ | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2702 | if (x == 0 && | 
|  | 2703 | (flags & F_ALT) && | 
|  | 2704 | (type == 'x' || type == 'X') && | 
|  | 2705 | buf[1] != (char)type)  /* this last always true under std C */ | 
|  | 2706 | { | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 2707 | memmove(buf+2, buf, strlen(buf) + 1); | 
|  | 2708 | buf[0] = '0'; | 
|  | 2709 | buf[1] = (char)type; | 
|  | 2710 | } | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2711 | return strlen(buf); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2712 | } | 
|  | 2713 |  | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2714 | static int | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2715 | formatchar(char *buf, size_t buflen, PyObject *v) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2716 | { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2717 | /* presume that the buffer is at least 2 characters long */ | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2718 | if (PyString_Check(v)) { | 
|  | 2719 | if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2720 | return -1; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2721 | } | 
|  | 2722 | else { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2723 | if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2724 | return -1; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2725 | } | 
|  | 2726 | buf[1] = '\0'; | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 2727 | return 1; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2728 | } | 
|  | 2729 |  | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2730 |  | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2731 | /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) | 
|  | 2732 |  | 
|  | 2733 | FORMATBUFLEN is the length of the buffer in which the floats, ints, & | 
|  | 2734 | chars are formatted. XXX This is a magic number. Each formatting | 
|  | 2735 | routine does bounds checking to ensure no overflow, but a better | 
|  | 2736 | solution may be to malloc a buffer of appropriate size for each | 
|  | 2737 | format. For now, the current solution is sufficient. | 
|  | 2738 | */ | 
|  | 2739 | #define FORMATBUFLEN (size_t)120 | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2740 |  | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2741 | PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 2742 | PyString_Format(PyObject *format, PyObject *args) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2743 | { | 
|  | 2744 | char *fmt, *res; | 
|  | 2745 | int fmtcnt, rescnt, reslen, arglen, argidx; | 
| Guido van Rossum | 993952b | 1996-05-21 22:44:20 +0000 | [diff] [blame] | 2746 | int args_owned = 0; | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 2747 | PyObject *result, *orig_args, *v, *w; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2748 | PyObject *dict = NULL; | 
|  | 2749 | if (format == NULL || !PyString_Check(format) || args == NULL) { | 
|  | 2750 | PyErr_BadInternalCall(); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2751 | return NULL; | 
|  | 2752 | } | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 2753 | orig_args = args; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2754 | fmt = PyString_AsString(format); | 
|  | 2755 | fmtcnt = PyString_Size(format); | 
| Guido van Rossum | 6ac258d | 1993-05-12 08:24:20 +0000 | [diff] [blame] | 2756 | reslen = rescnt = fmtcnt + 100; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2757 | result = PyString_FromStringAndSize((char *)NULL, reslen); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2758 | if (result == NULL) | 
|  | 2759 | return NULL; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2760 | res = PyString_AsString(result); | 
|  | 2761 | if (PyTuple_Check(args)) { | 
|  | 2762 | arglen = PyTuple_Size(args); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2763 | argidx = 0; | 
|  | 2764 | } | 
|  | 2765 | else { | 
|  | 2766 | arglen = -1; | 
|  | 2767 | argidx = -2; | 
|  | 2768 | } | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2769 | if (args->ob_type->tp_as_mapping) | 
|  | 2770 | dict = args; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2771 | while (--fmtcnt >= 0) { | 
|  | 2772 | if (*fmt != '%') { | 
|  | 2773 | if (--rescnt < 0) { | 
| Guido van Rossum | 6ac258d | 1993-05-12 08:24:20 +0000 | [diff] [blame] | 2774 | rescnt = fmtcnt + 100; | 
|  | 2775 | reslen += rescnt; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2776 | if (_PyString_Resize(&result, reslen) < 0) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2777 | return NULL; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2778 | res = PyString_AsString(result) | 
|  | 2779 | + reslen - rescnt; | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2780 | --rescnt; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2781 | } | 
|  | 2782 | *res++ = *fmt++; | 
|  | 2783 | } | 
|  | 2784 | else { | 
|  | 2785 | /* Got a format specifier */ | 
|  | 2786 | int flags = 0; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2787 | int width = -1; | 
|  | 2788 | int prec = -1; | 
| Guido van Rossum | 6938a29 | 1993-11-11 14:51:57 +0000 | [diff] [blame] | 2789 | int c = '\0'; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2790 | int fill; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2791 | PyObject *v = NULL; | 
|  | 2792 | PyObject *temp = NULL; | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2793 | char *pbuf; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2794 | int sign; | 
|  | 2795 | int len; | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2796 | char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */ | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 2797 | char *fmt_start = fmt; | 
| Marc-André Lemburg | 542fe56 | 2001-05-02 14:21:53 +0000 | [diff] [blame] | 2798 | int argidx_start = argidx; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2799 |  | 
| Guido van Rossum | da9c271 | 1996-12-05 21:58:58 +0000 | [diff] [blame] | 2800 | fmt++; | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2801 | if (*fmt == '(') { | 
|  | 2802 | char *keystart; | 
|  | 2803 | int keylen; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2804 | PyObject *key; | 
| Guido van Rossum | 045e688 | 1997-09-08 18:30:11 +0000 | [diff] [blame] | 2805 | int pcount = 1; | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2806 |  | 
|  | 2807 | if (dict == NULL) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2808 | PyErr_SetString(PyExc_TypeError, | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 2809 | "format requires a mapping"); | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2810 | goto error; | 
|  | 2811 | } | 
|  | 2812 | ++fmt; | 
|  | 2813 | --fmtcnt; | 
|  | 2814 | keystart = fmt; | 
| Guido van Rossum | 045e688 | 1997-09-08 18:30:11 +0000 | [diff] [blame] | 2815 | /* Skip over balanced parentheses */ | 
|  | 2816 | while (pcount > 0 && --fmtcnt >= 0) { | 
|  | 2817 | if (*fmt == ')') | 
|  | 2818 | --pcount; | 
|  | 2819 | else if (*fmt == '(') | 
|  | 2820 | ++pcount; | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2821 | fmt++; | 
| Guido van Rossum | 045e688 | 1997-09-08 18:30:11 +0000 | [diff] [blame] | 2822 | } | 
|  | 2823 | keylen = fmt - keystart - 1; | 
|  | 2824 | if (fmtcnt < 0 || pcount > 0) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2825 | PyErr_SetString(PyExc_ValueError, | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2826 | "incomplete format key"); | 
|  | 2827 | goto error; | 
|  | 2828 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2829 | key = PyString_FromStringAndSize(keystart, | 
|  | 2830 | keylen); | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2831 | if (key == NULL) | 
|  | 2832 | goto error; | 
| Guido van Rossum | 993952b | 1996-05-21 22:44:20 +0000 | [diff] [blame] | 2833 | if (args_owned) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2834 | Py_DECREF(args); | 
| Guido van Rossum | 993952b | 1996-05-21 22:44:20 +0000 | [diff] [blame] | 2835 | args_owned = 0; | 
|  | 2836 | } | 
|  | 2837 | args = PyObject_GetItem(dict, key); | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2838 | Py_DECREF(key); | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2839 | if (args == NULL) { | 
|  | 2840 | goto error; | 
|  | 2841 | } | 
| Guido van Rossum | 993952b | 1996-05-21 22:44:20 +0000 | [diff] [blame] | 2842 | args_owned = 1; | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2843 | arglen = -1; | 
|  | 2844 | argidx = -2; | 
|  | 2845 | } | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2846 | while (--fmtcnt >= 0) { | 
|  | 2847 | switch (c = *fmt++) { | 
|  | 2848 | case '-': flags |= F_LJUST; continue; | 
|  | 2849 | case '+': flags |= F_SIGN; continue; | 
|  | 2850 | case ' ': flags |= F_BLANK; continue; | 
|  | 2851 | case '#': flags |= F_ALT; continue; | 
|  | 2852 | case '0': flags |= F_ZERO; continue; | 
|  | 2853 | } | 
|  | 2854 | break; | 
|  | 2855 | } | 
|  | 2856 | if (c == '*') { | 
|  | 2857 | v = getnextarg(args, arglen, &argidx); | 
|  | 2858 | if (v == NULL) | 
|  | 2859 | goto error; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2860 | if (!PyInt_Check(v)) { | 
|  | 2861 | PyErr_SetString(PyExc_TypeError, | 
|  | 2862 | "* wants int"); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2863 | goto error; | 
|  | 2864 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2865 | width = PyInt_AsLong(v); | 
| Guido van Rossum | 98c9eba | 1999-06-07 15:12:32 +0000 | [diff] [blame] | 2866 | if (width < 0) { | 
|  | 2867 | flags |= F_LJUST; | 
|  | 2868 | width = -width; | 
|  | 2869 | } | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2870 | if (--fmtcnt >= 0) | 
|  | 2871 | c = *fmt++; | 
|  | 2872 | } | 
| Guido van Rossum | 9fa2c11 | 1995-02-10 17:00:37 +0000 | [diff] [blame] | 2873 | else if (c >= 0 && isdigit(c)) { | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2874 | width = c - '0'; | 
|  | 2875 | while (--fmtcnt >= 0) { | 
| Guido van Rossum | 9fa2c11 | 1995-02-10 17:00:37 +0000 | [diff] [blame] | 2876 | c = Py_CHARMASK(*fmt++); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2877 | if (!isdigit(c)) | 
|  | 2878 | break; | 
|  | 2879 | if ((width*10) / 10 != width) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2880 | PyErr_SetString( | 
|  | 2881 | PyExc_ValueError, | 
|  | 2882 | "width too big"); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2883 | goto error; | 
|  | 2884 | } | 
|  | 2885 | width = width*10 + (c - '0'); | 
|  | 2886 | } | 
|  | 2887 | } | 
|  | 2888 | if (c == '.') { | 
|  | 2889 | prec = 0; | 
|  | 2890 | if (--fmtcnt >= 0) | 
|  | 2891 | c = *fmt++; | 
|  | 2892 | if (c == '*') { | 
|  | 2893 | v = getnextarg(args, arglen, &argidx); | 
|  | 2894 | if (v == NULL) | 
|  | 2895 | goto error; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2896 | if (!PyInt_Check(v)) { | 
|  | 2897 | PyErr_SetString( | 
|  | 2898 | PyExc_TypeError, | 
|  | 2899 | "* wants int"); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2900 | goto error; | 
|  | 2901 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2902 | prec = PyInt_AsLong(v); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2903 | if (prec < 0) | 
|  | 2904 | prec = 0; | 
|  | 2905 | if (--fmtcnt >= 0) | 
|  | 2906 | c = *fmt++; | 
|  | 2907 | } | 
| Guido van Rossum | 9fa2c11 | 1995-02-10 17:00:37 +0000 | [diff] [blame] | 2908 | else if (c >= 0 && isdigit(c)) { | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2909 | prec = c - '0'; | 
|  | 2910 | while (--fmtcnt >= 0) { | 
| Guido van Rossum | 9fa2c11 | 1995-02-10 17:00:37 +0000 | [diff] [blame] | 2911 | c = Py_CHARMASK(*fmt++); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2912 | if (!isdigit(c)) | 
|  | 2913 | break; | 
|  | 2914 | if ((prec*10) / 10 != prec) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2915 | PyErr_SetString( | 
|  | 2916 | PyExc_ValueError, | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2917 | "prec too big"); | 
|  | 2918 | goto error; | 
|  | 2919 | } | 
|  | 2920 | prec = prec*10 + (c - '0'); | 
|  | 2921 | } | 
|  | 2922 | } | 
|  | 2923 | } /* prec */ | 
|  | 2924 | if (fmtcnt >= 0) { | 
|  | 2925 | if (c == 'h' || c == 'l' || c == 'L') { | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2926 | if (--fmtcnt >= 0) | 
|  | 2927 | c = *fmt++; | 
|  | 2928 | } | 
|  | 2929 | } | 
|  | 2930 | if (fmtcnt < 0) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2931 | PyErr_SetString(PyExc_ValueError, | 
|  | 2932 | "incomplete format"); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2933 | goto error; | 
|  | 2934 | } | 
|  | 2935 | if (c != '%') { | 
|  | 2936 | v = getnextarg(args, arglen, &argidx); | 
|  | 2937 | if (v == NULL) | 
|  | 2938 | goto error; | 
|  | 2939 | } | 
|  | 2940 | sign = 0; | 
|  | 2941 | fill = ' '; | 
|  | 2942 | switch (c) { | 
|  | 2943 | case '%': | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2944 | pbuf = "%"; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2945 | len = 1; | 
|  | 2946 | break; | 
|  | 2947 | case 's': | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 2948 | case 'r': | 
|  | 2949 | if (PyUnicode_Check(v)) { | 
|  | 2950 | fmt = fmt_start; | 
| Marc-André Lemburg | 542fe56 | 2001-05-02 14:21:53 +0000 | [diff] [blame] | 2951 | argidx = argidx_start; | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 2952 | goto unicode; | 
|  | 2953 | } | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2954 | if (c == 's') | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2955 | temp = PyObject_Str(v); | 
| Guido van Rossum | f0b7b04 | 2000-04-11 15:39:26 +0000 | [diff] [blame] | 2956 | else | 
|  | 2957 | temp = PyObject_Repr(v); | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 2958 | if (temp == NULL) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2959 | goto error; | 
| Guido van Rossum | 4a0144c | 1998-06-09 15:08:41 +0000 | [diff] [blame] | 2960 | if (!PyString_Check(temp)) { | 
|  | 2961 | PyErr_SetString(PyExc_TypeError, | 
|  | 2962 | "%s argument has non-string str()"); | 
|  | 2963 | goto error; | 
|  | 2964 | } | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 2965 | pbuf = PyString_AsString(temp); | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 2966 | len = PyString_Size(temp); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2967 | if (prec >= 0 && len > prec) | 
|  | 2968 | len = prec; | 
|  | 2969 | break; | 
|  | 2970 | case 'i': | 
|  | 2971 | case 'd': | 
|  | 2972 | case 'u': | 
|  | 2973 | case 'o': | 
|  | 2974 | case 'x': | 
|  | 2975 | case 'X': | 
|  | 2976 | if (c == 'i') | 
|  | 2977 | c = 'd'; | 
| Tim Peters | a3a3a03 | 2000-11-30 05:22:44 +0000 | [diff] [blame] | 2978 | if (PyLong_Check(v)) { | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2979 | temp = _PyString_FormatLong(v, flags, | 
|  | 2980 | prec, c, &pbuf, &len); | 
|  | 2981 | if (!temp) | 
|  | 2982 | goto error; | 
|  | 2983 | /* unbounded ints can always produce | 
|  | 2984 | a sign character! */ | 
|  | 2985 | sign = 1; | 
| Guido van Rossum | 4acdc23 | 1997-01-29 06:00:24 +0000 | [diff] [blame] | 2986 | } | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 2987 | else { | 
|  | 2988 | pbuf = formatbuf; | 
|  | 2989 | len = formatint(pbuf, sizeof(formatbuf), | 
|  | 2990 | flags, prec, c, v); | 
|  | 2991 | if (len < 0) | 
|  | 2992 | goto error; | 
|  | 2993 | /* only d conversion is signed */ | 
|  | 2994 | sign = c == 'd'; | 
|  | 2995 | } | 
|  | 2996 | if (flags & F_ZERO) | 
|  | 2997 | fill = '0'; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 2998 | break; | 
|  | 2999 | case 'e': | 
|  | 3000 | case 'E': | 
|  | 3001 | case 'f': | 
|  | 3002 | case 'g': | 
|  | 3003 | case 'G': | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 3004 | pbuf = formatbuf; | 
|  | 3005 | len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v); | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 3006 | if (len < 0) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3007 | goto error; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3008 | sign = 1; | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3009 | if (flags & F_ZERO) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3010 | fill = '0'; | 
|  | 3011 | break; | 
|  | 3012 | case 'c': | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 3013 | pbuf = formatbuf; | 
|  | 3014 | len = formatchar(pbuf, sizeof(formatbuf), v); | 
| Guido van Rossum | a04d47b | 1997-01-21 16:12:09 +0000 | [diff] [blame] | 3015 | if (len < 0) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3016 | goto error; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3017 | break; | 
|  | 3018 | default: | 
| Guido van Rossum | 045e688 | 1997-09-08 18:30:11 +0000 | [diff] [blame] | 3019 | PyErr_Format(PyExc_ValueError, | 
| Andrew M. Kuchling | 6ca8917 | 2000-12-15 13:07:46 +0000 | [diff] [blame] | 3020 | "unsupported format character '%c' (0x%x) " | 
|  | 3021 | "at index %i", | 
|  | 3022 | c, c, fmt - 1 - PyString_AsString(format)); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3023 | goto error; | 
|  | 3024 | } | 
|  | 3025 | if (sign) { | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 3026 | if (*pbuf == '-' || *pbuf == '+') { | 
|  | 3027 | sign = *pbuf++; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3028 | len--; | 
|  | 3029 | } | 
|  | 3030 | else if (flags & F_SIGN) | 
|  | 3031 | sign = '+'; | 
|  | 3032 | else if (flags & F_BLANK) | 
|  | 3033 | sign = ' '; | 
|  | 3034 | else | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3035 | sign = 0; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3036 | } | 
|  | 3037 | if (width < len) | 
|  | 3038 | width = len; | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3039 | if (rescnt < width + (sign != 0)) { | 
| Guido van Rossum | 6ac258d | 1993-05-12 08:24:20 +0000 | [diff] [blame] | 3040 | reslen -= rescnt; | 
|  | 3041 | rescnt = width + fmtcnt + 100; | 
|  | 3042 | reslen += rescnt; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3043 | if (_PyString_Resize(&result, reslen) < 0) | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3044 | return NULL; | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3045 | res = PyString_AsString(result) | 
|  | 3046 | + reslen - rescnt; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3047 | } | 
|  | 3048 | if (sign) { | 
| Guido van Rossum | 71e57d0 | 1993-11-11 15:03:51 +0000 | [diff] [blame] | 3049 | if (fill != ' ') | 
|  | 3050 | *res++ = sign; | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3051 | rescnt--; | 
|  | 3052 | if (width > len) | 
|  | 3053 | width--; | 
|  | 3054 | } | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3055 | if ((flags & F_ALT) && (c == 'x' || c == 'X')) { | 
|  | 3056 | assert(pbuf[0] == '0'); | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 3057 | assert(pbuf[1] == c); | 
|  | 3058 | if (fill != ' ') { | 
|  | 3059 | *res++ = *pbuf++; | 
|  | 3060 | *res++ = *pbuf++; | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3061 | } | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 3062 | rescnt -= 2; | 
|  | 3063 | width -= 2; | 
|  | 3064 | if (width < 0) | 
|  | 3065 | width = 0; | 
|  | 3066 | len -= 2; | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3067 | } | 
|  | 3068 | if (width > len && !(flags & F_LJUST)) { | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3069 | do { | 
|  | 3070 | --rescnt; | 
|  | 3071 | *res++ = fill; | 
|  | 3072 | } while (--width > len); | 
|  | 3073 | } | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3074 | if (fill == ' ') { | 
|  | 3075 | if (sign) | 
|  | 3076 | *res++ = sign; | 
|  | 3077 | if ((flags & F_ALT) && | 
| Tim Peters | fff5325 | 2001-04-12 18:38:48 +0000 | [diff] [blame] | 3078 | (c == 'x' || c == 'X')) { | 
|  | 3079 | assert(pbuf[0] == '0'); | 
|  | 3080 | assert(pbuf[1] == c); | 
| Tim Peters | 38fd5b6 | 2000-09-21 05:43:11 +0000 | [diff] [blame] | 3081 | *res++ = *pbuf++; | 
|  | 3082 | *res++ = *pbuf++; | 
|  | 3083 | } | 
|  | 3084 | } | 
| Marc-André Lemburg | f28dd83 | 2000-06-30 10:29:57 +0000 | [diff] [blame] | 3085 | memcpy(res, pbuf, len); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3086 | res += len; | 
|  | 3087 | rescnt -= len; | 
|  | 3088 | while (--width >= len) { | 
|  | 3089 | --rescnt; | 
|  | 3090 | *res++ = ' '; | 
|  | 3091 | } | 
| Guido van Rossum | 9fa2c11 | 1995-02-10 17:00:37 +0000 | [diff] [blame] | 3092 | if (dict && (argidx < arglen) && c != '%') { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3093 | PyErr_SetString(PyExc_TypeError, | 
| Guido van Rossum | 013142a | 1994-08-30 08:19:36 +0000 | [diff] [blame] | 3094 | "not all arguments converted"); | 
|  | 3095 | goto error; | 
|  | 3096 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3097 | Py_XDECREF(temp); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3098 | } /* '%' */ | 
|  | 3099 | } /* until end */ | 
| Guido van Rossum | caeaafc | 1995-02-27 10:13:23 +0000 | [diff] [blame] | 3100 | if (argidx < arglen && !dict) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3101 | PyErr_SetString(PyExc_TypeError, | 
|  | 3102 | "not all arguments converted"); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3103 | goto error; | 
|  | 3104 | } | 
| Guido van Rossum | 1109fbc | 1998-04-10 22:16:39 +0000 | [diff] [blame] | 3105 | if (args_owned) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3106 | Py_DECREF(args); | 
| Guido van Rossum | 1109fbc | 1998-04-10 22:16:39 +0000 | [diff] [blame] | 3107 | } | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3108 | _PyString_Resize(&result, reslen - rescnt); | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3109 | return result; | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3110 |  | 
|  | 3111 | unicode: | 
|  | 3112 | if (args_owned) { | 
|  | 3113 | Py_DECREF(args); | 
|  | 3114 | args_owned = 0; | 
|  | 3115 | } | 
| Marc-André Lemburg | 542fe56 | 2001-05-02 14:21:53 +0000 | [diff] [blame] | 3116 | /* Fiddle args right (remove the first argidx arguments) */ | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3117 | if (PyTuple_Check(orig_args) && argidx > 0) { | 
|  | 3118 | PyObject *v; | 
|  | 3119 | int n = PyTuple_GET_SIZE(orig_args) - argidx; | 
|  | 3120 | v = PyTuple_New(n); | 
|  | 3121 | if (v == NULL) | 
|  | 3122 | goto error; | 
|  | 3123 | while (--n >= 0) { | 
|  | 3124 | PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); | 
|  | 3125 | Py_INCREF(w); | 
|  | 3126 | PyTuple_SET_ITEM(v, n, w); | 
|  | 3127 | } | 
|  | 3128 | args = v; | 
|  | 3129 | } else { | 
|  | 3130 | Py_INCREF(orig_args); | 
|  | 3131 | args = orig_args; | 
|  | 3132 | } | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 3133 | args_owned = 1; | 
|  | 3134 | /* Take what we have of the result and let the Unicode formatting | 
|  | 3135 | function format the rest of the input. */ | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3136 | rescnt = res - PyString_AS_STRING(result); | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 3137 | if (_PyString_Resize(&result, rescnt)) | 
|  | 3138 | goto error; | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3139 | fmtcnt = PyString_GET_SIZE(format) - \ | 
|  | 3140 | (fmt - PyString_AS_STRING(format)); | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 3141 | format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); | 
|  | 3142 | if (format == NULL) | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3143 | goto error; | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 3144 | v = PyUnicode_Format(format, args); | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3145 | Py_DECREF(format); | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 3146 | if (v == NULL) | 
|  | 3147 | goto error; | 
|  | 3148 | /* Paste what we have (result) to what the Unicode formatting | 
|  | 3149 | function returned (v) and return the result (or error) */ | 
|  | 3150 | w = PyUnicode_Concat(result, v); | 
|  | 3151 | Py_DECREF(result); | 
|  | 3152 | Py_DECREF(v); | 
| Guido van Rossum | 90daa87 | 2000-04-10 13:47:21 +0000 | [diff] [blame] | 3153 | Py_DECREF(args); | 
| Marc-André Lemburg | 53f3d4a | 2000-10-07 08:54:09 +0000 | [diff] [blame] | 3154 | return w; | 
| Tim Peters | b3d8d1f | 2001-04-28 05:38:26 +0000 | [diff] [blame] | 3155 |  | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3156 | error: | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3157 | Py_DECREF(result); | 
| Guido van Rossum | 1109fbc | 1998-04-10 22:16:39 +0000 | [diff] [blame] | 3158 | if (args_owned) { | 
| Guido van Rossum | c0b618a | 1997-05-02 03:12:38 +0000 | [diff] [blame] | 3159 | Py_DECREF(args); | 
| Guido van Rossum | 1109fbc | 1998-04-10 22:16:39 +0000 | [diff] [blame] | 3160 | } | 
| Guido van Rossum | e537240 | 1993-03-16 12:15:04 +0000 | [diff] [blame] | 3161 | return NULL; | 
|  | 3162 | } | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 3163 |  | 
|  | 3164 |  | 
|  | 3165 | #ifdef INTERN_STRINGS | 
|  | 3166 |  | 
| Barry Warsaw | 4df762f | 2000-08-16 23:41:01 +0000 | [diff] [blame] | 3167 | /* This dictionary will leak at PyString_Fini() time.  That's acceptable | 
|  | 3168 | * because PyString_Fini() specifically frees interned strings that are | 
|  | 3169 | * only referenced by this dictionary.  The CVS log entry for revision 2.45 | 
|  | 3170 | * says: | 
|  | 3171 | * | 
|  | 3172 | *    Change the Fini function to only remove otherwise unreferenced | 
|  | 3173 | *    strings from the interned table.  There are references in | 
|  | 3174 | *    hard-to-find static variables all over the interpreter, and it's not | 
|  | 3175 | *    worth trying to get rid of all those; but "uninterning" isn't fair | 
|  | 3176 | *    either and may cause subtle failures later -- so we have to keep them | 
|  | 3177 | *    in the interned table. | 
|  | 3178 | */ | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 3179 | static PyObject *interned; | 
|  | 3180 |  | 
|  | 3181 | void | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 3182 | PyString_InternInPlace(PyObject **p) | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 3183 | { | 
|  | 3184 | register PyStringObject *s = (PyStringObject *)(*p); | 
|  | 3185 | PyObject *t; | 
|  | 3186 | if (s == NULL || !PyString_Check(s)) | 
|  | 3187 | Py_FatalError("PyString_InternInPlace: strings only please!"); | 
|  | 3188 | if ((t = s->ob_sinterned) != NULL) { | 
|  | 3189 | if (t == (PyObject *)s) | 
|  | 3190 | return; | 
|  | 3191 | Py_INCREF(t); | 
|  | 3192 | *p = t; | 
|  | 3193 | Py_DECREF(s); | 
|  | 3194 | return; | 
|  | 3195 | } | 
|  | 3196 | if (interned == NULL) { | 
|  | 3197 | interned = PyDict_New(); | 
|  | 3198 | if (interned == NULL) | 
|  | 3199 | return; | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 3200 | } | 
|  | 3201 | if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { | 
|  | 3202 | Py_INCREF(t); | 
|  | 3203 | *p = s->ob_sinterned = t; | 
|  | 3204 | Py_DECREF(s); | 
|  | 3205 | return; | 
|  | 3206 | } | 
|  | 3207 | t = (PyObject *)s; | 
|  | 3208 | if (PyDict_SetItem(interned, t, t) == 0) { | 
|  | 3209 | s->ob_sinterned = t; | 
|  | 3210 | return; | 
|  | 3211 | } | 
|  | 3212 | PyErr_Clear(); | 
|  | 3213 | } | 
|  | 3214 |  | 
|  | 3215 |  | 
|  | 3216 | PyObject * | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 3217 | PyString_InternFromString(const char *cp) | 
| Guido van Rossum | 2a61e74 | 1997-01-18 07:55:05 +0000 | [diff] [blame] | 3218 | { | 
|  | 3219 | PyObject *s = PyString_FromString(cp); | 
|  | 3220 | if (s == NULL) | 
|  | 3221 | return NULL; | 
|  | 3222 | PyString_InternInPlace(&s); | 
|  | 3223 | return s; | 
|  | 3224 | } | 
|  | 3225 |  | 
|  | 3226 | #endif | 
| Guido van Rossum | 8cf0476 | 1997-08-02 02:57:45 +0000 | [diff] [blame] | 3227 |  | 
|  | 3228 | void | 
| Fred Drake | ba09633 | 2000-07-09 07:04:36 +0000 | [diff] [blame] | 3229 | PyString_Fini(void) | 
| Guido van Rossum | 8cf0476 | 1997-08-02 02:57:45 +0000 | [diff] [blame] | 3230 | { | 
|  | 3231 | int i; | 
| Guido van Rossum | 8cf0476 | 1997-08-02 02:57:45 +0000 | [diff] [blame] | 3232 | for (i = 0; i < UCHAR_MAX + 1; i++) { | 
|  | 3233 | Py_XDECREF(characters[i]); | 
|  | 3234 | characters[i] = NULL; | 
|  | 3235 | } | 
|  | 3236 | #ifndef DONT_SHARE_SHORT_STRINGS | 
|  | 3237 | Py_XDECREF(nullstring); | 
|  | 3238 | nullstring = NULL; | 
|  | 3239 | #endif | 
| Guido van Rossum | 971a7aa | 1997-08-05 02:15:12 +0000 | [diff] [blame] | 3240 | #ifdef INTERN_STRINGS | 
|  | 3241 | if (interned) { | 
|  | 3242 | int pos, changed; | 
|  | 3243 | PyObject *key, *value; | 
|  | 3244 | do { | 
|  | 3245 | changed = 0; | 
|  | 3246 | pos = 0; | 
|  | 3247 | while (PyDict_Next(interned, &pos, &key, &value)) { | 
|  | 3248 | if (key->ob_refcnt == 2 && key == value) { | 
|  | 3249 | PyDict_DelItem(interned, key); | 
|  | 3250 | changed = 1; | 
|  | 3251 | } | 
|  | 3252 | } | 
|  | 3253 | } while (changed); | 
|  | 3254 | } | 
|  | 3255 | #endif | 
| Guido van Rossum | 8cf0476 | 1997-08-02 02:57:45 +0000 | [diff] [blame] | 3256 | } | 
| Barry Warsaw | a903ad98 | 2001-02-23 16:40:48 +0000 | [diff] [blame] | 3257 |  | 
|  | 3258 | #ifdef INTERN_STRINGS | 
|  | 3259 | void _Py_ReleaseInternedStrings(void) | 
|  | 3260 | { | 
|  | 3261 | if (interned) { | 
| Guido van Rossum | 59d1d2b | 2001-04-20 19:13:02 +0000 | [diff] [blame] | 3262 | fprintf(stderr, "releasing interned strings\n"); | 
|  | 3263 | PyDict_Clear(interned); | 
| Barry Warsaw | a903ad98 | 2001-02-23 16:40:48 +0000 | [diff] [blame] | 3264 | Py_DECREF(interned); | 
|  | 3265 | interned = NULL; | 
|  | 3266 | } | 
|  | 3267 | } | 
|  | 3268 | #endif /* INTERN_STRINGS */ |