blob: 48c246a1e5e01f2e073e6c5a6127b0872256506e [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* String object implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
Guido van Rossum013142a1994-08-30 08:19:36 +000029#include <ctype.h>
30
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000031#ifdef COUNT_ALLOCS
32int null_strings, one_strings;
33#endif
34
Guido van Rossum03093a21994-09-28 15:51:32 +000035#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036#include <limits.h>
37#else
38#ifndef UCHAR_MAX
39#define UCHAR_MAX 255
40#endif
41#endif
42
43static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000046#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047
48/*
49 Newsizedstringobject() and newstringobject() try in certain cases
50 to share string objects. When the size of the string is zero,
51 these routines always return a pointer to the same string object;
52 when the size is one, they return a pointer to an already existing
53 object if the contents of the string is known. For
54 newstringobject() this is always the case, for
55 newsizedstringobject() this is the case when the first argument in
56 not NULL.
57 A common practice to allocate a string and then fill it in or
58 change it must be done carefully. It is only allowed to change the
59 contents of the string if the obect was gotten from
60 newsizedstringobject() with a NULL first argument, because in the
61 future these routines may try to do even more sharing of objects.
62*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063object *
64newsizedstringobject(str, size)
65 char *str;
66 int size;
67{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000069#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 if (size == 0 && (op = nullstring) != NULL) {
71#ifdef COUNT_ALLOCS
72 null_strings++;
73#endif
74 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000075 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000076 }
77 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
78#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000084#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
88 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 op->ob_type = &Stringtype;
90 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091#ifdef CACHE_HASH
92 op->ob_shash = -1;
93#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +000094 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000095 if (str != NULL)
96 memcpy(op->ob_sval, str, size);
97 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 if (size == 0) {
100 nullstring = op;
101 INCREF(op);
102 } else if (size == 1 && str != NULL) {
103 characters[*str & UCHAR_MAX] = op;
104 INCREF(op);
105 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000106#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107 return (object *) op;
108}
109
110object *
111newstringobject(str)
112 char *str;
113{
114 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000116#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
121 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000122 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
128 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000129 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000131#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000134 if (op == NULL)
135 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000136 op->ob_type = &Stringtype;
137 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138#ifdef CACHE_HASH
139 op->ob_shash = -1;
140#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000141 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000142 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000143#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
145 nullstring = op;
146 INCREF(op);
147 } else if (size == 1) {
148 characters[*str & UCHAR_MAX] = op;
149 INCREF(op);
150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152 return (object *) op;
153}
154
Guido van Rossum234f9421993-06-17 12:35:49 +0000155static void
Guido van Rossume5372401993-03-16 12:15:04 +0000156string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000157 object *op;
158{
159 DEL(op);
160}
161
Guido van Rossumd7047b31995-01-02 19:07:15 +0000162int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000163getstringsize(op)
164 register object *op;
165{
166 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000167 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000168 return -1;
169 }
170 return ((stringobject *)op) -> ob_size;
171}
172
173/*const*/ char *
174getstringvalue(op)
175 register object *op;
176{
177 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000178 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 return NULL;
180 }
181 return ((stringobject *)op) -> ob_sval;
182}
183
184/* Methods */
185
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000186static int
Guido van Rossume5372401993-03-16 12:15:04 +0000187string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000188 stringobject *op;
189 FILE *fp;
190 int flags;
191{
192 int i;
193 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000194 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000195 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000196 if (flags & PRINT_RAW) {
197 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000198 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000199 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200
201 /* figure out which quote to use; single is prefered */
202 quote = '\'';
203 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
204 quote = '"';
205
206 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000207 for (i = 0; i < op->ob_size; i++) {
208 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000209 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 fprintf(fp, "\\%c", c);
211 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000212 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000214 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000217 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218}
219
220static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000221string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222 register stringobject *op;
223{
224 /* XXX overflow? */
225 int newsize = 2 + 4 * op->ob_size * sizeof(char);
226 object *v = newsizedstringobject((char *)NULL, newsize);
227 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000228 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 }
230 else {
231 register int i;
232 register char c;
233 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000234 int quote;
235
236 /* figure out which quote to use; single is prefered */
237 quote = '\'';
238 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
239 quote = '"';
240
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000241 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000242 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 for (i = 0; i < op->ob_size; i++) {
244 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000245 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000246 *p++ = '\\', *p++ = c;
247 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000248 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000249 while (*p != '\0')
250 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000251 }
252 else
253 *p++ = c;
254 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 *p = '\0';
257 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000258 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262static int
Guido van Rossume5372401993-03-16 12:15:04 +0000263string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 stringobject *a;
265{
266 return a->ob_size;
267}
268
269static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000270string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271 register stringobject *a;
272 register object *bb;
273{
274 register unsigned int size;
275 register stringobject *op;
276 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000277 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278 return NULL;
279 }
280#define b ((stringobject *)bb)
281 /* Optimize cases with empty left or right operand */
282 if (a->ob_size == 0) {
283 INCREF(bb);
284 return bb;
285 }
286 if (b->ob_size == 0) {
287 INCREF(a);
288 return (object *)a;
289 }
290 size = a->ob_size + b->ob_size;
291 op = (stringobject *)
292 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000293 if (op == NULL)
294 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000295 op->ob_type = &Stringtype;
296 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000297#ifdef CACHE_HASH
298 op->ob_shash = -1;
299#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000300 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000301 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
302 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
303 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304 return (object *) op;
305#undef b
306}
307
308static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000309string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 register stringobject *a;
311 register int n;
312{
313 register int i;
314 register unsigned int size;
315 register stringobject *op;
316 if (n < 0)
317 n = 0;
318 size = a->ob_size * n;
319 if (size == a->ob_size) {
320 INCREF(a);
321 return (object *)a;
322 }
323 op = (stringobject *)
324 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000325 if (op == NULL)
326 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000327 op->ob_type = &Stringtype;
328 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000329#ifdef CACHE_HASH
330 op->ob_shash = -1;
331#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000332 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000333 for (i = 0; i < size; i += a->ob_size)
334 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
335 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 return (object *) op;
337}
338
339/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
340
341static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000342string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 register stringobject *a;
344 register int i, j; /* May be negative! */
345{
346 if (i < 0)
347 i = 0;
348 if (j < 0)
349 j = 0; /* Avoid signed/unsigned bug in next line */
350 if (j > a->ob_size)
351 j = a->ob_size;
352 if (i == 0 && j == a->ob_size) { /* It's the same as a */
353 INCREF(a);
354 return (object *)a;
355 }
356 if (j < i)
357 j = i;
358 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
359}
360
361static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000362string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 stringobject *a;
364 register int i;
365{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000366 int c;
367 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000369 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 return NULL;
371 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000372 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000373 v = (object *) characters[c];
374#ifdef COUNT_ALLOCS
375 if (v != NULL)
376 one_strings++;
377#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000378 if (v == NULL) {
379 v = newsizedstringobject((char *)NULL, 1);
380 if (v == NULL)
381 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000382 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000383 ((stringobject *)v)->ob_sval[0] = c;
384 }
385 INCREF(v);
386 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387}
388
389static int
Guido van Rossume5372401993-03-16 12:15:04 +0000390string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 stringobject *a, *b;
392{
Guido van Rossum253919f1991-02-13 23:18:39 +0000393 int len_a = a->ob_size, len_b = b->ob_size;
394 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000395 int cmp;
396 if (min_len > 0) {
397 cmp = *a->ob_sval - *b->ob_sval;
398 if (cmp == 0)
399 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
400 if (cmp != 0)
401 return cmp;
402 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000403 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404}
405
Guido van Rossum9bfef441993-03-29 10:43:31 +0000406static long
407string_hash(a)
408 stringobject *a;
409{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000410 register int len;
411 register unsigned char *p;
412 register long x;
413
414#ifdef CACHE_HASH
415 if (a->ob_shash != -1)
416 return a->ob_shash;
417#endif
418 len = a->ob_size;
419 p = (unsigned char *) a->ob_sval;
420 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000421 while (--len >= 0)
Guido van Rossum5fe60581995-03-09 12:12:50 +0000422 x = (3*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000423 x ^= a->ob_size;
424 if (x == -1)
425 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000426#ifdef CACHE_HASH
427 a->ob_shash = x;
428#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000429 return x;
430}
431
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000433 (inquiry)string_length, /*sq_length*/
434 (binaryfunc)string_concat, /*sq_concat*/
435 (intargfunc)string_repeat, /*sq_repeat*/
436 (intargfunc)string_item, /*sq_item*/
437 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000438 0, /*sq_ass_item*/
439 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440};
441
442typeobject Stringtype = {
443 OB_HEAD_INIT(&Typetype)
444 0,
445 "string",
446 sizeof(stringobject),
447 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000448 (destructor)string_dealloc, /*tp_dealloc*/
449 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450 0, /*tp_getattr*/
451 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000452 (cmpfunc)string_compare, /*tp_compare*/
453 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 0, /*tp_as_number*/
455 &string_as_sequence, /*tp_as_sequence*/
456 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000457 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458};
459
460void
461joinstring(pv, w)
462 register object **pv;
463 register object *w;
464{
465 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000466 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000468 if (w == NULL || !is_stringobject(*pv)) {
469 DECREF(*pv);
470 *pv = NULL;
471 return;
472 }
Guido van Rossume5372401993-03-16 12:15:04 +0000473 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 DECREF(*pv);
475 *pv = v;
476}
477
Guido van Rossum013142a1994-08-30 08:19:36 +0000478void
479joinstring_decref(pv, w)
480 register object **pv;
481 register object *w;
482{
483 joinstring(pv, w);
484 XDECREF(w);
485}
486
487
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000488/* The following function breaks the notion that strings are immutable:
489 it changes the size of a string. We get away with this only if there
490 is only one module referencing the object. You can also think of it
491 as creating a new string object and destroying the old one, only
492 more efficiently. In any case, don't use this if the string may
493 already be known to some other part of the code... */
494
495int
496resizestring(pv, newsize)
497 object **pv;
498 int newsize;
499{
Guido van Rossum921842f1990-11-18 17:30:23 +0000500 register object *v;
501 register stringobject *sv;
502 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 if (!is_stringobject(v) || v->ob_refcnt != 1) {
504 *pv = 0;
505 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000506 err_badcall();
507 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000509 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum392ab321990-11-18 17:41:19 +0000510#ifdef REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +0000511 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +0000512#endif
513 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514 *pv = (object *)
515 realloc((char *)v,
516 sizeof(stringobject) + newsize * sizeof(char));
517 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000518 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000519 err_nomem();
520 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000522 NEWREF(*pv);
523 sv = (stringobject *) *pv;
524 sv->ob_size = newsize;
525 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526 return 0;
527}
Guido van Rossume5372401993-03-16 12:15:04 +0000528
529/* Helpers for formatstring */
530
531static object *
532getnextarg(args, arglen, p_argidx)
533 object *args;
534 int arglen;
535 int *p_argidx;
536{
537 int argidx = *p_argidx;
538 if (argidx < arglen) {
539 (*p_argidx)++;
540 if (arglen < 0)
541 return args;
542 else
543 return gettupleitem(args, argidx);
544 }
545 err_setstr(TypeError, "not enough arguments for format string");
546 return NULL;
547}
548
549#define F_LJUST (1<<0)
550#define F_SIGN (1<<1)
551#define F_BLANK (1<<2)
552#define F_ALT (1<<3)
553#define F_ZERO (1<<4)
554
555extern double fabs PROTO((double));
556
557static char *
558formatfloat(flags, prec, type, v)
559 int flags;
560 int prec;
561 int type;
562 object *v;
563{
564 char fmt[20];
565 static char buf[120];
566 double x;
567 if (!getargs(v, "d;float argument required", &x))
568 return NULL;
569 if (prec < 0)
570 prec = 6;
571 if (prec > 50)
572 prec = 50; /* Arbitrary limitation */
573 if (type == 'f' && fabs(x)/1e25 >= 1e25)
574 type = 'g';
575 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
576 sprintf(buf, fmt, x);
577 return buf;
578}
579
580static char *
581formatint(flags, prec, type, v)
582 int flags;
583 int prec;
584 int type;
585 object *v;
586{
587 char fmt[20];
588 static char buf[50];
589 long x;
590 if (!getargs(v, "l;int argument required", &x))
591 return NULL;
592 if (prec < 0)
593 prec = 1;
594 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
595 sprintf(buf, fmt, x);
596 return buf;
597}
598
599static char *
600formatchar(v)
601 object *v;
602{
603 static char buf[2];
604 if (is_stringobject(v)) {
605 if (!getargs(v, "c;%c requires int or char", &buf[0]))
606 return NULL;
607 }
608 else {
609 if (!getargs(v, "b;%c requires int or char", &buf[0]))
610 return NULL;
611 }
612 buf[1] = '\0';
613 return buf;
614}
615
Guido van Rossum013142a1994-08-30 08:19:36 +0000616
Guido van Rossume5372401993-03-16 12:15:04 +0000617/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
618
619object *
620formatstring(format, args)
621 object *format;
622 object *args;
623{
624 char *fmt, *res;
625 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +0000626 int args_owned = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000627 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000628 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000629 if (format == NULL || !is_stringobject(format) || args == NULL) {
630 err_badcall();
631 return NULL;
632 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000633 fmt = getstringvalue(format);
634 fmtcnt = getstringsize(format);
635 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000636 result = newsizedstringobject((char *)NULL, reslen);
637 if (result == NULL)
638 return NULL;
639 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000640 if (is_tupleobject(args)) {
641 arglen = gettuplesize(args);
642 argidx = 0;
643 }
644 else {
645 arglen = -1;
646 argidx = -2;
647 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000648 if (args->ob_type->tp_as_mapping)
649 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000650 while (--fmtcnt >= 0) {
651 if (*fmt != '%') {
652 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000653 rescnt = fmtcnt + 100;
654 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000655 if (resizestring(&result, reslen) < 0)
656 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000657 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000658 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000659 }
660 *res++ = *fmt++;
661 }
662 else {
663 /* Got a format specifier */
664 int flags = 0;
665 char *fmtstart = fmt++;
666 int width = -1;
667 int prec = -1;
668 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000669 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000670 int fill;
671 object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000672 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000673 char *buf;
674 int sign;
675 int len;
Guido van Rossum993952b1996-05-21 22:44:20 +0000676 args_owned = 0;
Guido van Rossum013142a1994-08-30 08:19:36 +0000677 if (*fmt == '(') {
678 char *keystart;
679 int keylen;
680 object *key;
681
682 if (dict == NULL) {
683 err_setstr(TypeError,
684 "format requires a mapping");
685 goto error;
686 }
687 ++fmt;
688 --fmtcnt;
689 keystart = fmt;
690 while (--fmtcnt >= 0 && *fmt != ')')
691 fmt++;
692 keylen = fmt - keystart;
693 ++fmt;
694 if (fmtcnt < 0) {
695 err_setstr(ValueError,
696 "incomplete format key");
697 goto error;
698 }
699 key = newsizedstringobject(keystart, keylen);
700 if (key == NULL)
701 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +0000702 if (args_owned) {
703 DECREF(args);
704 args_owned = 0;
705 }
706 args = PyObject_GetItem(dict, key);
Guido van Rossum013142a1994-08-30 08:19:36 +0000707 DECREF(key);
708 if (args == NULL) {
709 goto error;
710 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000711 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +0000712 arglen = -1;
713 argidx = -2;
714 }
Guido van Rossume5372401993-03-16 12:15:04 +0000715 while (--fmtcnt >= 0) {
716 switch (c = *fmt++) {
717 case '-': flags |= F_LJUST; continue;
718 case '+': flags |= F_SIGN; continue;
719 case ' ': flags |= F_BLANK; continue;
720 case '#': flags |= F_ALT; continue;
721 case '0': flags |= F_ZERO; continue;
722 }
723 break;
724 }
725 if (c == '*') {
726 v = getnextarg(args, arglen, &argidx);
727 if (v == NULL)
728 goto error;
729 if (!is_intobject(v)) {
730 err_setstr(TypeError, "* wants int");
731 goto error;
732 }
733 width = getintvalue(v);
734 if (width < 0)
735 width = 0;
736 if (--fmtcnt >= 0)
737 c = *fmt++;
738 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000739 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000740 width = c - '0';
741 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000742 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000743 if (!isdigit(c))
744 break;
745 if ((width*10) / 10 != width) {
746 err_setstr(ValueError,
747 "width too big");
748 goto error;
749 }
750 width = width*10 + (c - '0');
751 }
752 }
753 if (c == '.') {
754 prec = 0;
755 if (--fmtcnt >= 0)
756 c = *fmt++;
757 if (c == '*') {
758 v = getnextarg(args, arglen, &argidx);
759 if (v == NULL)
760 goto error;
761 if (!is_intobject(v)) {
762 err_setstr(TypeError,
763 "* wants int");
764 goto error;
765 }
766 prec = getintvalue(v);
767 if (prec < 0)
768 prec = 0;
769 if (--fmtcnt >= 0)
770 c = *fmt++;
771 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000772 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000773 prec = c - '0';
774 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000775 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000776 if (!isdigit(c))
777 break;
778 if ((prec*10) / 10 != prec) {
779 err_setstr(ValueError,
780 "prec too big");
781 goto error;
782 }
783 prec = prec*10 + (c - '0');
784 }
785 }
786 } /* prec */
787 if (fmtcnt >= 0) {
788 if (c == 'h' || c == 'l' || c == 'L') {
789 size = c;
790 if (--fmtcnt >= 0)
791 c = *fmt++;
792 }
793 }
794 if (fmtcnt < 0) {
795 err_setstr(ValueError, "incomplete format");
796 goto error;
797 }
798 if (c != '%') {
799 v = getnextarg(args, arglen, &argidx);
800 if (v == NULL)
801 goto error;
802 }
803 sign = 0;
804 fill = ' ';
805 switch (c) {
806 case '%':
807 buf = "%";
808 len = 1;
809 break;
810 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000811 temp = strobject(v);
812 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000813 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000814 buf = getstringvalue(temp);
815 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000816 if (prec >= 0 && len > prec)
817 len = prec;
818 break;
819 case 'i':
820 case 'd':
821 case 'u':
822 case 'o':
823 case 'x':
824 case 'X':
825 if (c == 'i')
826 c = 'd';
827 buf = formatint(flags, prec, c, v);
828 if (buf == NULL)
829 goto error;
830 len = strlen(buf);
831 sign = (c == 'd');
832 if (flags&F_ZERO)
833 fill = '0';
834 break;
835 case 'e':
836 case 'E':
837 case 'f':
838 case 'g':
839 case 'G':
840 buf = formatfloat(flags, prec, c, v);
841 if (buf == NULL)
842 goto error;
843 len = strlen(buf);
844 sign = 1;
845 if (flags&F_ZERO)
846 fill = '0';
847 break;
848 case 'c':
849 buf = formatchar(v);
850 if (buf == NULL)
851 goto error;
Guido van Rossum6938a291993-11-11 14:51:57 +0000852 len = 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000853 break;
854 default:
855 err_setstr(ValueError,
856 "unsupported format character");
857 goto error;
858 }
859 if (sign) {
860 if (*buf == '-' || *buf == '+') {
861 sign = *buf++;
862 len--;
863 }
864 else if (flags & F_SIGN)
865 sign = '+';
866 else if (flags & F_BLANK)
867 sign = ' ';
868 else
869 sign = '\0';
870 }
871 if (width < len)
872 width = len;
873 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000874 reslen -= rescnt;
875 rescnt = width + fmtcnt + 100;
876 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000877 if (resizestring(&result, reslen) < 0)
878 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000879 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000880 }
881 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000882 if (fill != ' ')
883 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000884 rescnt--;
885 if (width > len)
886 width--;
887 }
888 if (width > len && !(flags&F_LJUST)) {
889 do {
890 --rescnt;
891 *res++ = fill;
892 } while (--width > len);
893 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000894 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000895 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000896 memcpy(res, buf, len);
897 res += len;
898 rescnt -= len;
899 while (--width >= len) {
900 --rescnt;
901 *res++ = ' ';
902 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000903 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000904 err_setstr(TypeError,
905 "not all arguments converted");
906 goto error;
907 }
908 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000909 } /* '%' */
910 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +0000911 if (argidx < arglen && !dict) {
Guido van Rossume5372401993-03-16 12:15:04 +0000912 err_setstr(TypeError, "not all arguments converted");
913 goto error;
914 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000915 if (args_owned)
916 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000917 resizestring(&result, reslen - rescnt);
918 return result;
919 error:
920 DECREF(result);
Guido van Rossum993952b1996-05-21 22:44:20 +0000921 if (args_owned)
922 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000923 return NULL;
924}