blob: f3063cf7800adcf62ac091874444c7f61f12f2a4 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* String object implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
Guido van Rossum013142a1994-08-30 08:19:36 +000029#include <ctype.h>
30
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000031#ifdef COUNT_ALLOCS
32int null_strings, one_strings;
33#endif
34
Guido van Rossum03093a21994-09-28 15:51:32 +000035#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036#include <limits.h>
37#else
38#ifndef UCHAR_MAX
39#define UCHAR_MAX 255
40#endif
41#endif
42
43static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000046#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047
48/*
49 Newsizedstringobject() and newstringobject() try in certain cases
50 to share string objects. When the size of the string is zero,
51 these routines always return a pointer to the same string object;
52 when the size is one, they return a pointer to an already existing
53 object if the contents of the string is known. For
54 newstringobject() this is always the case, for
55 newsizedstringobject() this is the case when the first argument in
56 not NULL.
57 A common practice to allocate a string and then fill it in or
58 change it must be done carefully. It is only allowed to change the
59 contents of the string if the obect was gotten from
60 newsizedstringobject() with a NULL first argument, because in the
61 future these routines may try to do even more sharing of objects.
62*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063object *
64newsizedstringobject(str, size)
65 char *str;
66 int size;
67{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000069#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 if (size == 0 && (op = nullstring) != NULL) {
71#ifdef COUNT_ALLOCS
72 null_strings++;
73#endif
74 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000075 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000076 }
77 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
78#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000084#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
88 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 op->ob_type = &Stringtype;
90 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091#ifdef CACHE_HASH
92 op->ob_shash = -1;
93#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +000094 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000095 if (str != NULL)
96 memcpy(op->ob_sval, str, size);
97 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 if (size == 0) {
100 nullstring = op;
101 INCREF(op);
102 } else if (size == 1 && str != NULL) {
103 characters[*str & UCHAR_MAX] = op;
104 INCREF(op);
105 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000106#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107 return (object *) op;
108}
109
110object *
111newstringobject(str)
112 char *str;
113{
114 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000116#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
121 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000122 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
128 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000129 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000131#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000134 if (op == NULL)
135 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000136 op->ob_type = &Stringtype;
137 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138#ifdef CACHE_HASH
139 op->ob_shash = -1;
140#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000141 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000142 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000143#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
145 nullstring = op;
146 INCREF(op);
147 } else if (size == 1) {
148 characters[*str & UCHAR_MAX] = op;
149 INCREF(op);
150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152 return (object *) op;
153}
154
Guido van Rossum234f9421993-06-17 12:35:49 +0000155static void
Guido van Rossume5372401993-03-16 12:15:04 +0000156string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000157 object *op;
158{
159 DEL(op);
160}
161
Guido van Rossumd7047b31995-01-02 19:07:15 +0000162int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000163getstringsize(op)
164 register object *op;
165{
166 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000167 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000168 return -1;
169 }
170 return ((stringobject *)op) -> ob_size;
171}
172
173/*const*/ char *
174getstringvalue(op)
175 register object *op;
176{
177 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000178 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 return NULL;
180 }
181 return ((stringobject *)op) -> ob_sval;
182}
183
184/* Methods */
185
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000186static int
Guido van Rossume5372401993-03-16 12:15:04 +0000187string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000188 stringobject *op;
189 FILE *fp;
190 int flags;
191{
192 int i;
193 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000194 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000195 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000196 if (flags & PRINT_RAW) {
197 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000198 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000199 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200
201 /* figure out which quote to use; single is prefered */
202 quote = '\'';
203 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
204 quote = '"';
205
206 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000207 for (i = 0; i < op->ob_size; i++) {
208 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000209 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 fprintf(fp, "\\%c", c);
211 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000212 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000214 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000217 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218}
219
220static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000221string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222 register stringobject *op;
223{
224 /* XXX overflow? */
225 int newsize = 2 + 4 * op->ob_size * sizeof(char);
226 object *v = newsizedstringobject((char *)NULL, newsize);
227 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000228 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 }
230 else {
231 register int i;
232 register char c;
233 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000234 int quote;
235
236 /* figure out which quote to use; single is prefered */
237 quote = '\'';
238 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
239 quote = '"';
240
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000241 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000242 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 for (i = 0; i < op->ob_size; i++) {
244 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000245 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000246 *p++ = '\\', *p++ = c;
247 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000248 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000249 while (*p != '\0')
250 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000251 }
252 else
253 *p++ = c;
254 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 *p = '\0';
257 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000258 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262static int
Guido van Rossume5372401993-03-16 12:15:04 +0000263string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 stringobject *a;
265{
266 return a->ob_size;
267}
268
269static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000270string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271 register stringobject *a;
272 register object *bb;
273{
274 register unsigned int size;
275 register stringobject *op;
276 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000277 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278 return NULL;
279 }
280#define b ((stringobject *)bb)
281 /* Optimize cases with empty left or right operand */
282 if (a->ob_size == 0) {
283 INCREF(bb);
284 return bb;
285 }
286 if (b->ob_size == 0) {
287 INCREF(a);
288 return (object *)a;
289 }
290 size = a->ob_size + b->ob_size;
291 op = (stringobject *)
292 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000293 if (op == NULL)
294 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000295 op->ob_type = &Stringtype;
296 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000297#ifdef CACHE_HASH
298 op->ob_shash = -1;
299#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000300 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000301 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
302 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
303 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304 return (object *) op;
305#undef b
306}
307
308static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000309string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 register stringobject *a;
311 register int n;
312{
313 register int i;
314 register unsigned int size;
315 register stringobject *op;
316 if (n < 0)
317 n = 0;
318 size = a->ob_size * n;
319 if (size == a->ob_size) {
320 INCREF(a);
321 return (object *)a;
322 }
323 op = (stringobject *)
324 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000325 if (op == NULL)
326 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000327 op->ob_type = &Stringtype;
328 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000329#ifdef CACHE_HASH
330 op->ob_shash = -1;
331#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000332 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000333 for (i = 0; i < size; i += a->ob_size)
334 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
335 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 return (object *) op;
337}
338
339/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
340
341static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000342string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 register stringobject *a;
344 register int i, j; /* May be negative! */
345{
346 if (i < 0)
347 i = 0;
348 if (j < 0)
349 j = 0; /* Avoid signed/unsigned bug in next line */
350 if (j > a->ob_size)
351 j = a->ob_size;
352 if (i == 0 && j == a->ob_size) { /* It's the same as a */
353 INCREF(a);
354 return (object *)a;
355 }
356 if (j < i)
357 j = i;
358 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
359}
360
361static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000362string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 stringobject *a;
364 register int i;
365{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000366 int c;
367 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000369 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 return NULL;
371 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000372 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000373 v = (object *) characters[c];
374#ifdef COUNT_ALLOCS
375 if (v != NULL)
376 one_strings++;
377#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000378 if (v == NULL) {
379 v = newsizedstringobject((char *)NULL, 1);
380 if (v == NULL)
381 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000382 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000383 ((stringobject *)v)->ob_sval[0] = c;
384 }
385 INCREF(v);
386 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387}
388
389static int
Guido van Rossume5372401993-03-16 12:15:04 +0000390string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 stringobject *a, *b;
392{
Guido van Rossum253919f1991-02-13 23:18:39 +0000393 int len_a = a->ob_size, len_b = b->ob_size;
394 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000395 int cmp;
396 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000397 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000398 if (cmp == 0)
399 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
400 if (cmp != 0)
401 return cmp;
402 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000403 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404}
405
Guido van Rossum9bfef441993-03-29 10:43:31 +0000406static long
407string_hash(a)
408 stringobject *a;
409{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000410 register int len;
411 register unsigned char *p;
412 register long x;
413
414#ifdef CACHE_HASH
415 if (a->ob_shash != -1)
416 return a->ob_shash;
417#endif
418 len = a->ob_size;
419 p = (unsigned char *) a->ob_sval;
420 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000421 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000422 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000423 x ^= a->ob_size;
424 if (x == -1)
425 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000426#ifdef CACHE_HASH
427 a->ob_shash = x;
428#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000429 return x;
430}
431
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000433 (inquiry)string_length, /*sq_length*/
434 (binaryfunc)string_concat, /*sq_concat*/
435 (intargfunc)string_repeat, /*sq_repeat*/
436 (intargfunc)string_item, /*sq_item*/
437 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000438 0, /*sq_ass_item*/
439 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440};
441
442typeobject Stringtype = {
443 OB_HEAD_INIT(&Typetype)
444 0,
445 "string",
446 sizeof(stringobject),
447 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000448 (destructor)string_dealloc, /*tp_dealloc*/
449 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450 0, /*tp_getattr*/
451 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000452 (cmpfunc)string_compare, /*tp_compare*/
453 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 0, /*tp_as_number*/
455 &string_as_sequence, /*tp_as_sequence*/
456 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000457 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458};
459
460void
461joinstring(pv, w)
462 register object **pv;
463 register object *w;
464{
465 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000466 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000468 if (w == NULL || !is_stringobject(*pv)) {
469 DECREF(*pv);
470 *pv = NULL;
471 return;
472 }
Guido van Rossume5372401993-03-16 12:15:04 +0000473 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 DECREF(*pv);
475 *pv = v;
476}
477
Guido van Rossum013142a1994-08-30 08:19:36 +0000478void
479joinstring_decref(pv, w)
480 register object **pv;
481 register object *w;
482{
483 joinstring(pv, w);
484 XDECREF(w);
485}
486
487
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000488/* The following function breaks the notion that strings are immutable:
489 it changes the size of a string. We get away with this only if there
490 is only one module referencing the object. You can also think of it
491 as creating a new string object and destroying the old one, only
492 more efficiently. In any case, don't use this if the string may
493 already be known to some other part of the code... */
494
495int
496resizestring(pv, newsize)
497 object **pv;
498 int newsize;
499{
Guido van Rossum921842f1990-11-18 17:30:23 +0000500 register object *v;
501 register stringobject *sv;
502 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 if (!is_stringobject(v) || v->ob_refcnt != 1) {
504 *pv = 0;
505 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000506 err_badcall();
507 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000509 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +0000510#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +0000511 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +0000512#endif
513 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514 *pv = (object *)
515 realloc((char *)v,
516 sizeof(stringobject) + newsize * sizeof(char));
517 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000518 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000519 err_nomem();
520 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000522 NEWREF(*pv);
523 sv = (stringobject *) *pv;
524 sv->ob_size = newsize;
525 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526 return 0;
527}
Guido van Rossume5372401993-03-16 12:15:04 +0000528
529/* Helpers for formatstring */
530
531static object *
532getnextarg(args, arglen, p_argidx)
533 object *args;
534 int arglen;
535 int *p_argidx;
536{
537 int argidx = *p_argidx;
538 if (argidx < arglen) {
539 (*p_argidx)++;
540 if (arglen < 0)
541 return args;
542 else
543 return gettupleitem(args, argidx);
544 }
545 err_setstr(TypeError, "not enough arguments for format string");
546 return NULL;
547}
548
549#define F_LJUST (1<<0)
550#define F_SIGN (1<<1)
551#define F_BLANK (1<<2)
552#define F_ALT (1<<3)
553#define F_ZERO (1<<4)
554
555extern double fabs PROTO((double));
556
557static char *
558formatfloat(flags, prec, type, v)
559 int flags;
560 int prec;
561 int type;
562 object *v;
563{
564 char fmt[20];
565 static char buf[120];
566 double x;
567 if (!getargs(v, "d;float argument required", &x))
568 return NULL;
569 if (prec < 0)
570 prec = 6;
571 if (prec > 50)
572 prec = 50; /* Arbitrary limitation */
573 if (type == 'f' && fabs(x)/1e25 >= 1e25)
574 type = 'g';
575 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
576 sprintf(buf, fmt, x);
577 return buf;
578}
579
580static char *
581formatint(flags, prec, type, v)
582 int flags;
583 int prec;
584 int type;
585 object *v;
586{
587 char fmt[20];
588 static char buf[50];
589 long x;
590 if (!getargs(v, "l;int argument required", &x))
591 return NULL;
592 if (prec < 0)
593 prec = 1;
594 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
595 sprintf(buf, fmt, x);
596 return buf;
597}
598
599static char *
600formatchar(v)
601 object *v;
602{
603 static char buf[2];
604 if (is_stringobject(v)) {
605 if (!getargs(v, "c;%c requires int or char", &buf[0]))
606 return NULL;
607 }
608 else {
609 if (!getargs(v, "b;%c requires int or char", &buf[0]))
610 return NULL;
611 }
612 buf[1] = '\0';
613 return buf;
614}
615
Guido van Rossum013142a1994-08-30 08:19:36 +0000616
Guido van Rossume5372401993-03-16 12:15:04 +0000617/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
618
619object *
620formatstring(format, args)
621 object *format;
622 object *args;
623{
624 char *fmt, *res;
625 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +0000626 int args_owned = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000627 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000628 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000629 if (format == NULL || !is_stringobject(format) || args == NULL) {
630 err_badcall();
631 return NULL;
632 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000633 fmt = getstringvalue(format);
634 fmtcnt = getstringsize(format);
635 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000636 result = newsizedstringobject((char *)NULL, reslen);
637 if (result == NULL)
638 return NULL;
639 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000640 if (is_tupleobject(args)) {
641 arglen = gettuplesize(args);
642 argidx = 0;
643 }
644 else {
645 arglen = -1;
646 argidx = -2;
647 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000648 if (args->ob_type->tp_as_mapping)
649 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000650 while (--fmtcnt >= 0) {
651 if (*fmt != '%') {
652 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000653 rescnt = fmtcnt + 100;
654 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000655 if (resizestring(&result, reslen) < 0)
656 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000657 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000658 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000659 }
660 *res++ = *fmt++;
661 }
662 else {
663 /* Got a format specifier */
664 int flags = 0;
665 char *fmtstart = fmt++;
666 int width = -1;
667 int prec = -1;
668 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000669 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000670 int fill;
671 object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000672 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000673 char *buf;
674 int sign;
675 int len;
Guido van Rossum013142a1994-08-30 08:19:36 +0000676 if (*fmt == '(') {
677 char *keystart;
678 int keylen;
679 object *key;
680
681 if (dict == NULL) {
682 err_setstr(TypeError,
683 "format requires a mapping");
684 goto error;
685 }
686 ++fmt;
687 --fmtcnt;
688 keystart = fmt;
689 while (--fmtcnt >= 0 && *fmt != ')')
690 fmt++;
691 keylen = fmt - keystart;
692 ++fmt;
693 if (fmtcnt < 0) {
694 err_setstr(ValueError,
695 "incomplete format key");
696 goto error;
697 }
698 key = newsizedstringobject(keystart, keylen);
699 if (key == NULL)
700 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +0000701 if (args_owned) {
702 DECREF(args);
703 args_owned = 0;
704 }
705 args = PyObject_GetItem(dict, key);
Guido van Rossum013142a1994-08-30 08:19:36 +0000706 DECREF(key);
707 if (args == NULL) {
708 goto error;
709 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000710 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +0000711 arglen = -1;
712 argidx = -2;
713 }
Guido van Rossume5372401993-03-16 12:15:04 +0000714 while (--fmtcnt >= 0) {
715 switch (c = *fmt++) {
716 case '-': flags |= F_LJUST; continue;
717 case '+': flags |= F_SIGN; continue;
718 case ' ': flags |= F_BLANK; continue;
719 case '#': flags |= F_ALT; continue;
720 case '0': flags |= F_ZERO; continue;
721 }
722 break;
723 }
724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!is_intobject(v)) {
729 err_setstr(TypeError, "* wants int");
730 goto error;
731 }
732 width = getintvalue(v);
733 if (width < 0)
734 width = 0;
735 if (--fmtcnt >= 0)
736 c = *fmt++;
737 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000738 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000739 width = c - '0';
740 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000741 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000742 if (!isdigit(c))
743 break;
744 if ((width*10) / 10 != width) {
745 err_setstr(ValueError,
746 "width too big");
747 goto error;
748 }
749 width = width*10 + (c - '0');
750 }
751 }
752 if (c == '.') {
753 prec = 0;
754 if (--fmtcnt >= 0)
755 c = *fmt++;
756 if (c == '*') {
757 v = getnextarg(args, arglen, &argidx);
758 if (v == NULL)
759 goto error;
760 if (!is_intobject(v)) {
761 err_setstr(TypeError,
762 "* wants int");
763 goto error;
764 }
765 prec = getintvalue(v);
766 if (prec < 0)
767 prec = 0;
768 if (--fmtcnt >= 0)
769 c = *fmt++;
770 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000771 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000772 prec = c - '0';
773 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000774 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000775 if (!isdigit(c))
776 break;
777 if ((prec*10) / 10 != prec) {
778 err_setstr(ValueError,
779 "prec too big");
780 goto error;
781 }
782 prec = prec*10 + (c - '0');
783 }
784 }
785 } /* prec */
786 if (fmtcnt >= 0) {
787 if (c == 'h' || c == 'l' || c == 'L') {
788 size = c;
789 if (--fmtcnt >= 0)
790 c = *fmt++;
791 }
792 }
793 if (fmtcnt < 0) {
794 err_setstr(ValueError, "incomplete format");
795 goto error;
796 }
797 if (c != '%') {
798 v = getnextarg(args, arglen, &argidx);
799 if (v == NULL)
800 goto error;
801 }
802 sign = 0;
803 fill = ' ';
804 switch (c) {
805 case '%':
806 buf = "%";
807 len = 1;
808 break;
809 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000810 temp = strobject(v);
811 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000812 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000813 buf = getstringvalue(temp);
814 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000815 if (prec >= 0 && len > prec)
816 len = prec;
817 break;
818 case 'i':
819 case 'd':
820 case 'u':
821 case 'o':
822 case 'x':
823 case 'X':
824 if (c == 'i')
825 c = 'd';
826 buf = formatint(flags, prec, c, v);
827 if (buf == NULL)
828 goto error;
829 len = strlen(buf);
830 sign = (c == 'd');
831 if (flags&F_ZERO)
832 fill = '0';
833 break;
834 case 'e':
835 case 'E':
836 case 'f':
837 case 'g':
838 case 'G':
839 buf = formatfloat(flags, prec, c, v);
840 if (buf == NULL)
841 goto error;
842 len = strlen(buf);
843 sign = 1;
844 if (flags&F_ZERO)
845 fill = '0';
846 break;
847 case 'c':
848 buf = formatchar(v);
849 if (buf == NULL)
850 goto error;
Guido van Rossum6938a291993-11-11 14:51:57 +0000851 len = 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000852 break;
853 default:
854 err_setstr(ValueError,
855 "unsupported format character");
856 goto error;
857 }
858 if (sign) {
859 if (*buf == '-' || *buf == '+') {
860 sign = *buf++;
861 len--;
862 }
863 else if (flags & F_SIGN)
864 sign = '+';
865 else if (flags & F_BLANK)
866 sign = ' ';
867 else
868 sign = '\0';
869 }
870 if (width < len)
871 width = len;
872 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000873 reslen -= rescnt;
874 rescnt = width + fmtcnt + 100;
875 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000876 if (resizestring(&result, reslen) < 0)
877 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000878 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000879 }
880 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000881 if (fill != ' ')
882 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000883 rescnt--;
884 if (width > len)
885 width--;
886 }
887 if (width > len && !(flags&F_LJUST)) {
888 do {
889 --rescnt;
890 *res++ = fill;
891 } while (--width > len);
892 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000893 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000894 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000895 memcpy(res, buf, len);
896 res += len;
897 rescnt -= len;
898 while (--width >= len) {
899 --rescnt;
900 *res++ = ' ';
901 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000902 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000903 err_setstr(TypeError,
904 "not all arguments converted");
905 goto error;
906 }
907 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000908 } /* '%' */
909 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +0000910 if (argidx < arglen && !dict) {
Guido van Rossume5372401993-03-16 12:15:04 +0000911 err_setstr(TypeError, "not all arguments converted");
912 goto error;
913 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000914 if (args_owned)
915 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000916 resizestring(&result, reslen - rescnt);
917 return result;
918 error:
919 DECREF(result);
Guido van Rossum993952b1996-05-21 22:44:20 +0000920 if (args_owned)
921 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000922 return NULL;
923}