blob: 0d03a3ba2b4f8c3651004daa9482b477f57b93e7 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossume5372401993-03-16 12:15:04 +00002Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
3Amsterdam, The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* String object implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029#ifdef COUNT_ALLOCS
30int null_strings, one_strings;
31#endif
32
33#ifdef __STDC__
34#include <limits.h>
35#else
36#ifndef UCHAR_MAX
37#define UCHAR_MAX 255
38#endif
39#endif
40
41static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000042#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045
46/*
47 Newsizedstringobject() and newstringobject() try in certain cases
48 to share string objects. When the size of the string is zero,
49 these routines always return a pointer to the same string object;
50 when the size is one, they return a pointer to an already existing
51 object if the contents of the string is known. For
52 newstringobject() this is always the case, for
53 newsizedstringobject() this is the case when the first argument in
54 not NULL.
55 A common practice to allocate a string and then fill it in or
56 change it must be done carefully. It is only allowed to change the
57 contents of the string if the obect was gotten from
58 newsizedstringobject() with a NULL first argument, because in the
59 future these routines may try to do even more sharing of objects.
60*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000061object *
62newsizedstringobject(str, size)
63 char *str;
64 int size;
65{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000067#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 if (size == 0 && (op = nullstring) != NULL) {
69#ifdef COUNT_ALLOCS
70 null_strings++;
71#endif
72 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000073 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
75 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
76#ifdef COUNT_ALLOCS
77 one_strings++;
78#endif
79 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000080 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (op == NULL)
86 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 op->ob_type = &Stringtype;
88 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089#ifdef CACHE_HASH
90 op->ob_shash = -1;
91#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +000092 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
98 nullstring = op;
99 INCREF(op);
100 } else if (size == 1 && str != NULL) {
101 characters[*str & UCHAR_MAX] = op;
102 INCREF(op);
103 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000104#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105 return (object *) op;
106}
107
108object *
109newstringobject(str)
110 char *str;
111{
112 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000114#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 if (size == 0 && (op = nullstring) != NULL) {
116#ifdef COUNT_ALLOCS
117 null_strings++;
118#endif
119 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000120 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
123#ifdef COUNT_ALLOCS
124 one_strings++;
125#endif
126 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000127 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 if (op == NULL)
133 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000134 op->ob_type = &Stringtype;
135 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136#ifdef CACHE_HASH
137 op->ob_shash = -1;
138#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000139 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000140 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000141#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
143 nullstring = op;
144 INCREF(op);
145 } else if (size == 1) {
146 characters[*str & UCHAR_MAX] = op;
147 INCREF(op);
148 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000149#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150 return (object *) op;
151}
152
Guido van Rossum234f9421993-06-17 12:35:49 +0000153static void
Guido van Rossume5372401993-03-16 12:15:04 +0000154string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000155 object *op;
156{
157 DEL(op);
158}
159
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000160unsigned int
161getstringsize(op)
162 register object *op;
163{
164 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000165 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000166 return -1;
167 }
168 return ((stringobject *)op) -> ob_size;
169}
170
171/*const*/ char *
172getstringvalue(op)
173 register object *op;
174{
175 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000176 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177 return NULL;
178 }
179 return ((stringobject *)op) -> ob_sval;
180}
181
182/* Methods */
183
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000184static int
Guido van Rossume5372401993-03-16 12:15:04 +0000185string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000186 stringobject *op;
187 FILE *fp;
188 int flags;
189{
190 int i;
191 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000192 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000193 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000194 if (flags & PRINT_RAW) {
195 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000196 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000198
199 /* figure out which quote to use; single is prefered */
200 quote = '\'';
201 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
202 quote = '"';
203
204 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 for (i = 0; i < op->ob_size; i++) {
206 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000207 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000208 fprintf(fp, "\\%c", c);
209 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000211 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000212 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000214 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000215 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000216}
217
218static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000219string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220 register stringobject *op;
221{
222 /* XXX overflow? */
223 int newsize = 2 + 4 * op->ob_size * sizeof(char);
224 object *v = newsizedstringobject((char *)NULL, newsize);
225 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000226 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000227 }
228 else {
229 register int i;
230 register char c;
231 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 int quote;
233
234 /* figure out which quote to use; single is prefered */
235 quote = '\'';
236 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
237 quote = '"';
238
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000240 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000241 for (i = 0; i < op->ob_size; i++) {
242 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000243 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000244 *p++ = '\\', *p++ = c;
245 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000246 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 while (*p != '\0')
248 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000249 }
250 else
251 *p++ = c;
252 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000253 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254 *p = '\0';
255 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000256 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000258}
259
260static int
Guido van Rossume5372401993-03-16 12:15:04 +0000261string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 stringobject *a;
263{
264 return a->ob_size;
265}
266
267static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000268string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269 register stringobject *a;
270 register object *bb;
271{
272 register unsigned int size;
273 register stringobject *op;
274 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 return NULL;
277 }
278#define b ((stringobject *)bb)
279 /* Optimize cases with empty left or right operand */
280 if (a->ob_size == 0) {
281 INCREF(bb);
282 return bb;
283 }
284 if (b->ob_size == 0) {
285 INCREF(a);
286 return (object *)a;
287 }
288 size = a->ob_size + b->ob_size;
289 op = (stringobject *)
290 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000291 if (op == NULL)
292 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000293 op->ob_type = &Stringtype;
294 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000295#ifdef CACHE_HASH
296 op->ob_shash = -1;
297#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000298 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000299 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
300 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
301 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000302 return (object *) op;
303#undef b
304}
305
306static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000307string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000308 register stringobject *a;
309 register int n;
310{
311 register int i;
312 register unsigned int size;
313 register stringobject *op;
314 if (n < 0)
315 n = 0;
316 size = a->ob_size * n;
317 if (size == a->ob_size) {
318 INCREF(a);
319 return (object *)a;
320 }
321 op = (stringobject *)
322 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000323 if (op == NULL)
324 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000325 op->ob_type = &Stringtype;
326 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000327#ifdef CACHE_HASH
328 op->ob_shash = -1;
329#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000330 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000331 for (i = 0; i < size; i += a->ob_size)
332 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
333 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 return (object *) op;
335}
336
337/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
338
339static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000340string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341 register stringobject *a;
342 register int i, j; /* May be negative! */
343{
344 if (i < 0)
345 i = 0;
346 if (j < 0)
347 j = 0; /* Avoid signed/unsigned bug in next line */
348 if (j > a->ob_size)
349 j = a->ob_size;
350 if (i == 0 && j == a->ob_size) { /* It's the same as a */
351 INCREF(a);
352 return (object *)a;
353 }
354 if (j < i)
355 j = i;
356 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
357}
358
359static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000360string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 stringobject *a;
362 register int i;
363{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000364 int c;
365 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000366 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000367 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 return NULL;
369 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000370 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000371 v = (object *) characters[c];
372#ifdef COUNT_ALLOCS
373 if (v != NULL)
374 one_strings++;
375#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000376 if (v == NULL) {
377 v = newsizedstringobject((char *)NULL, 1);
378 if (v == NULL)
379 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000380 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000381 ((stringobject *)v)->ob_sval[0] = c;
382 }
383 INCREF(v);
384 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000385}
386
387static int
Guido van Rossume5372401993-03-16 12:15:04 +0000388string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000389 stringobject *a, *b;
390{
Guido van Rossum253919f1991-02-13 23:18:39 +0000391 int len_a = a->ob_size, len_b = b->ob_size;
392 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000393 int cmp;
394 if (min_len > 0) {
395 cmp = *a->ob_sval - *b->ob_sval;
396 if (cmp == 0)
397 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
398 if (cmp != 0)
399 return cmp;
400 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000401 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402}
403
Guido van Rossum9bfef441993-03-29 10:43:31 +0000404static long
405string_hash(a)
406 stringobject *a;
407{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000408 register int len;
409 register unsigned char *p;
410 register long x;
411
412#ifdef CACHE_HASH
413 if (a->ob_shash != -1)
414 return a->ob_shash;
415#endif
416 len = a->ob_size;
417 p = (unsigned char *) a->ob_sval;
418 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000419 while (--len >= 0)
420 x = (x + x + x) ^ *p++;
421 x ^= a->ob_size;
422 if (x == -1)
423 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000424#ifdef CACHE_HASH
425 a->ob_shash = x;
426#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000427 return x;
428}
429
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000430static sequence_methods string_as_sequence = {
Guido van Rossume5372401993-03-16 12:15:04 +0000431 string_length, /*sq_length*/
432 string_concat, /*sq_concat*/
433 string_repeat, /*sq_repeat*/
434 string_item, /*sq_item*/
435 string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000436 0, /*sq_ass_item*/
437 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000438};
439
440typeobject Stringtype = {
441 OB_HEAD_INIT(&Typetype)
442 0,
443 "string",
444 sizeof(stringobject),
445 sizeof(char),
Guido van Rossume5372401993-03-16 12:15:04 +0000446 string_dealloc, /*tp_dealloc*/
447 string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000448 0, /*tp_getattr*/
449 0, /*tp_setattr*/
Guido van Rossume5372401993-03-16 12:15:04 +0000450 string_compare, /*tp_compare*/
451 string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452 0, /*tp_as_number*/
453 &string_as_sequence, /*tp_as_sequence*/
454 0, /*tp_as_mapping*/
Guido van Rossum9bfef441993-03-29 10:43:31 +0000455 string_hash, /*tp_hash*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000456};
457
458void
459joinstring(pv, w)
460 register object **pv;
461 register object *w;
462{
463 register object *v;
464 if (*pv == NULL || w == NULL || !is_stringobject(*pv))
465 return;
Guido van Rossume5372401993-03-16 12:15:04 +0000466 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 DECREF(*pv);
468 *pv = v;
469}
470
471/* The following function breaks the notion that strings are immutable:
472 it changes the size of a string. We get away with this only if there
473 is only one module referencing the object. You can also think of it
474 as creating a new string object and destroying the old one, only
475 more efficiently. In any case, don't use this if the string may
476 already be known to some other part of the code... */
477
478int
479resizestring(pv, newsize)
480 object **pv;
481 int newsize;
482{
Guido van Rossum921842f1990-11-18 17:30:23 +0000483 register object *v;
484 register stringobject *sv;
485 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486 if (!is_stringobject(v) || v->ob_refcnt != 1) {
487 *pv = 0;
488 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000489 err_badcall();
490 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000491 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000492 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum392ab321990-11-18 17:41:19 +0000493#ifdef REF_DEBUG
Guido van Rossum921842f1990-11-18 17:30:23 +0000494 --ref_total;
495#endif
496 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000497 *pv = (object *)
498 realloc((char *)v,
499 sizeof(stringobject) + newsize * sizeof(char));
500 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000501 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000502 err_nomem();
503 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000505 NEWREF(*pv);
506 sv = (stringobject *) *pv;
507 sv->ob_size = newsize;
508 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509 return 0;
510}
Guido van Rossume5372401993-03-16 12:15:04 +0000511
512/* Helpers for formatstring */
513
514static object *
515getnextarg(args, arglen, p_argidx)
516 object *args;
517 int arglen;
518 int *p_argidx;
519{
520 int argidx = *p_argidx;
521 if (argidx < arglen) {
522 (*p_argidx)++;
523 if (arglen < 0)
524 return args;
525 else
526 return gettupleitem(args, argidx);
527 }
528 err_setstr(TypeError, "not enough arguments for format string");
529 return NULL;
530}
531
532#define F_LJUST (1<<0)
533#define F_SIGN (1<<1)
534#define F_BLANK (1<<2)
535#define F_ALT (1<<3)
536#define F_ZERO (1<<4)
537
538extern double fabs PROTO((double));
539
540static char *
541formatfloat(flags, prec, type, v)
542 int flags;
543 int prec;
544 int type;
545 object *v;
546{
547 char fmt[20];
548 static char buf[120];
549 double x;
550 if (!getargs(v, "d;float argument required", &x))
551 return NULL;
552 if (prec < 0)
553 prec = 6;
554 if (prec > 50)
555 prec = 50; /* Arbitrary limitation */
556 if (type == 'f' && fabs(x)/1e25 >= 1e25)
557 type = 'g';
558 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
559 sprintf(buf, fmt, x);
560 return buf;
561}
562
563static char *
564formatint(flags, prec, type, v)
565 int flags;
566 int prec;
567 int type;
568 object *v;
569{
570 char fmt[20];
571 static char buf[50];
572 long x;
573 if (!getargs(v, "l;int argument required", &x))
574 return NULL;
575 if (prec < 0)
576 prec = 1;
577 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
578 sprintf(buf, fmt, x);
579 return buf;
580}
581
582static char *
583formatchar(v)
584 object *v;
585{
586 static char buf[2];
587 if (is_stringobject(v)) {
588 if (!getargs(v, "c;%c requires int or char", &buf[0]))
589 return NULL;
590 }
591 else {
592 if (!getargs(v, "b;%c requires int or char", &buf[0]))
593 return NULL;
594 }
595 buf[1] = '\0';
596 return buf;
597}
598
599/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
600
601object *
602formatstring(format, args)
603 object *format;
604 object *args;
605{
606 char *fmt, *res;
607 int fmtcnt, rescnt, reslen, arglen, argidx;
608 object *result;
609 if (format == NULL || !is_stringobject(format) || args == NULL) {
610 err_badcall();
611 return NULL;
612 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000613 fmt = getstringvalue(format);
614 fmtcnt = getstringsize(format);
615 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000616 result = newsizedstringobject((char *)NULL, reslen);
617 if (result == NULL)
618 return NULL;
619 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000620 if (is_tupleobject(args)) {
621 arglen = gettuplesize(args);
622 argidx = 0;
623 }
624 else {
625 arglen = -1;
626 argidx = -2;
627 }
628 while (--fmtcnt >= 0) {
629 if (*fmt != '%') {
630 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000631 rescnt = fmtcnt + 100;
632 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000633 if (resizestring(&result, reslen) < 0)
634 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000635 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000636 }
637 *res++ = *fmt++;
638 }
639 else {
640 /* Got a format specifier */
641 int flags = 0;
642 char *fmtstart = fmt++;
643 int width = -1;
644 int prec = -1;
645 int size = 0;
646 int c;
647 int fill;
648 object *v;
649 char *buf;
650 int sign;
651 int len;
652 while (--fmtcnt >= 0) {
653 switch (c = *fmt++) {
654 case '-': flags |= F_LJUST; continue;
655 case '+': flags |= F_SIGN; continue;
656 case ' ': flags |= F_BLANK; continue;
657 case '#': flags |= F_ALT; continue;
658 case '0': flags |= F_ZERO; continue;
659 }
660 break;
661 }
662 if (c == '*') {
663 v = getnextarg(args, arglen, &argidx);
664 if (v == NULL)
665 goto error;
666 if (!is_intobject(v)) {
667 err_setstr(TypeError, "* wants int");
668 goto error;
669 }
670 width = getintvalue(v);
671 if (width < 0)
672 width = 0;
673 if (--fmtcnt >= 0)
674 c = *fmt++;
675 }
676 else if (isdigit(c)) {
677 width = c - '0';
678 while (--fmtcnt >= 0) {
679 c = *fmt++;
680 if (!isdigit(c))
681 break;
682 if ((width*10) / 10 != width) {
683 err_setstr(ValueError,
684 "width too big");
685 goto error;
686 }
687 width = width*10 + (c - '0');
688 }
689 }
690 if (c == '.') {
691 prec = 0;
692 if (--fmtcnt >= 0)
693 c = *fmt++;
694 if (c == '*') {
695 v = getnextarg(args, arglen, &argidx);
696 if (v == NULL)
697 goto error;
698 if (!is_intobject(v)) {
699 err_setstr(TypeError,
700 "* wants int");
701 goto error;
702 }
703 prec = getintvalue(v);
704 if (prec < 0)
705 prec = 0;
706 if (--fmtcnt >= 0)
707 c = *fmt++;
708 }
709 else if (isdigit(c)) {
710 prec = c - '0';
711 while (--fmtcnt >= 0) {
712 c = *fmt++;
713 if (!isdigit(c))
714 break;
715 if ((prec*10) / 10 != prec) {
716 err_setstr(ValueError,
717 "prec too big");
718 goto error;
719 }
720 prec = prec*10 + (c - '0');
721 }
722 }
723 } /* prec */
724 if (fmtcnt >= 0) {
725 if (c == 'h' || c == 'l' || c == 'L') {
726 size = c;
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 }
731 if (fmtcnt < 0) {
732 err_setstr(ValueError, "incomplete format");
733 goto error;
734 }
735 if (c != '%') {
736 v = getnextarg(args, arglen, &argidx);
737 if (v == NULL)
738 goto error;
739 }
740 sign = 0;
741 fill = ' ';
742 switch (c) {
743 case '%':
744 buf = "%";
745 len = 1;
746 break;
747 case 's':
748 if (!is_stringobject(v)) {
749 err_setstr(TypeError,
750 "%s wants string");
751 goto error;
752 }
753 buf = getstringvalue(v);
754 len = getstringsize(v);
755 if (prec >= 0 && len > prec)
756 len = prec;
757 break;
758 case 'i':
759 case 'd':
760 case 'u':
761 case 'o':
762 case 'x':
763 case 'X':
764 if (c == 'i')
765 c = 'd';
766 buf = formatint(flags, prec, c, v);
767 if (buf == NULL)
768 goto error;
769 len = strlen(buf);
770 sign = (c == 'd');
771 if (flags&F_ZERO)
772 fill = '0';
773 break;
774 case 'e':
775 case 'E':
776 case 'f':
777 case 'g':
778 case 'G':
779 buf = formatfloat(flags, prec, c, v);
780 if (buf == NULL)
781 goto error;
782 len = strlen(buf);
783 sign = 1;
784 if (flags&F_ZERO)
785 fill = '0';
786 break;
787 case 'c':
788 buf = formatchar(v);
789 if (buf == NULL)
790 goto error;
791 len = strlen(buf);
792 break;
793 default:
794 err_setstr(ValueError,
795 "unsupported format character");
796 goto error;
797 }
Guido van Rossum234f9421993-06-17 12:35:49 +0000798 /* XXX There's a bug somewhere here so that
799 XXX '%4d'%-1 yields '- 1' ... */
Guido van Rossume5372401993-03-16 12:15:04 +0000800 if (sign) {
801 if (*buf == '-' || *buf == '+') {
802 sign = *buf++;
803 len--;
804 }
805 else if (flags & F_SIGN)
806 sign = '+';
807 else if (flags & F_BLANK)
808 sign = ' ';
809 else
810 sign = '\0';
811 }
812 if (width < len)
813 width = len;
814 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000815 reslen -= rescnt;
816 rescnt = width + fmtcnt + 100;
817 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000818 if (resizestring(&result, reslen) < 0)
819 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000820 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000821 }
822 if (sign) {
823 *res++ = sign;
824 rescnt--;
825 if (width > len)
826 width--;
827 }
828 if (width > len && !(flags&F_LJUST)) {
829 do {
830 --rescnt;
831 *res++ = fill;
832 } while (--width > len);
833 }
834 memcpy(res, buf, len);
835 res += len;
836 rescnt -= len;
837 while (--width >= len) {
838 --rescnt;
839 *res++ = ' ';
840 }
841 } /* '%' */
842 } /* until end */
843 if (argidx < arglen) {
844 err_setstr(TypeError, "not all arguments converted");
845 goto error;
846 }
847 resizestring(&result, reslen - rescnt);
848 return result;
849 error:
850 DECREF(result);
851 return NULL;
852}