blob: 61863b63d08b83db51d9b1fa74e4c2a587db4bb1 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossume5372401993-03-16 12:15:04 +00002Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
3Amsterdam, The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* String object implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029#ifdef COUNT_ALLOCS
30int null_strings, one_strings;
31#endif
32
33#ifdef __STDC__
34#include <limits.h>
35#else
36#ifndef UCHAR_MAX
37#define UCHAR_MAX 255
38#endif
39#endif
40
41static stringobject *characters[UCHAR_MAX + 1];
42static stringobject *nullstring;
43
44/*
45 Newsizedstringobject() and newstringobject() try in certain cases
46 to share string objects. When the size of the string is zero,
47 these routines always return a pointer to the same string object;
48 when the size is one, they return a pointer to an already existing
49 object if the contents of the string is known. For
50 newstringobject() this is always the case, for
51 newsizedstringobject() this is the case when the first argument in
52 not NULL.
53 A common practice to allocate a string and then fill it in or
54 change it must be done carefully. It is only allowed to change the
55 contents of the string if the obect was gotten from
56 newsizedstringobject() with a NULL first argument, because in the
57 future these routines may try to do even more sharing of objects.
58*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059object *
60newsizedstringobject(str, size)
61 char *str;
62 int size;
63{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000064 register stringobject *op;
65 if (size == 0 && (op = nullstring) != NULL) {
66#ifdef COUNT_ALLOCS
67 null_strings++;
68#endif
69 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000070 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
72 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
73#ifdef COUNT_ALLOCS
74 one_strings++;
75#endif
76 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000077 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
79 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000080 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (op == NULL)
82 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_type = &Stringtype;
84 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085#ifdef CACHE_HASH
86 op->ob_shash = -1;
87#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +000088 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 if (str != NULL)
90 memcpy(op->ob_sval, str, size);
91 op->ob_sval[size] = '\0';
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 if (size == 0) {
93 nullstring = op;
94 INCREF(op);
95 } else if (size == 1 && str != NULL) {
96 characters[*str & UCHAR_MAX] = op;
97 INCREF(op);
98 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099 return (object *) op;
100}
101
102object *
103newstringobject(str)
104 char *str;
105{
106 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 register stringobject *op;
108 if (size == 0 && (op = nullstring) != NULL) {
109#ifdef COUNT_ALLOCS
110 null_strings++;
111#endif
112 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000113 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 }
115 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
116#ifdef COUNT_ALLOCS
117 one_strings++;
118#endif
119 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000120 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000121 }
122 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
125 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000126 op->ob_type = &Stringtype;
127 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000128#ifdef CACHE_HASH
129 op->ob_shash = -1;
130#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000131 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000132 strcpy(op->ob_sval, str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
134 nullstring = op;
135 INCREF(op);
136 } else if (size == 1) {
137 characters[*str & UCHAR_MAX] = op;
138 INCREF(op);
139 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 return (object *) op;
141}
142
Guido van Rossum234f9421993-06-17 12:35:49 +0000143static void
Guido van Rossume5372401993-03-16 12:15:04 +0000144string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000145 object *op;
146{
147 DEL(op);
148}
149
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150unsigned int
151getstringsize(op)
152 register object *op;
153{
154 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000155 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156 return -1;
157 }
158 return ((stringobject *)op) -> ob_size;
159}
160
161/*const*/ char *
162getstringvalue(op)
163 register object *op;
164{
165 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000166 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000167 return NULL;
168 }
169 return ((stringobject *)op) -> ob_sval;
170}
171
172/* Methods */
173
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000174static int
Guido van Rossume5372401993-03-16 12:15:04 +0000175string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176 stringobject *op;
177 FILE *fp;
178 int flags;
179{
180 int i;
181 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000182 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000183 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 if (flags & PRINT_RAW) {
185 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000186 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000188
189 /* figure out which quote to use; single is prefered */
190 quote = '\'';
191 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
192 quote = '"';
193
194 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 for (i = 0; i < op->ob_size; i++) {
196 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000197 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198 fprintf(fp, "\\%c", c);
199 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000202 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000204 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000205 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000206}
207
208static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000209string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 register stringobject *op;
211{
212 /* XXX overflow? */
213 int newsize = 2 + 4 * op->ob_size * sizeof(char);
214 object *v = newsizedstringobject((char *)NULL, newsize);
215 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000216 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000217 }
218 else {
219 register int i;
220 register char c;
221 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000222 int quote;
223
224 /* figure out which quote to use; single is prefered */
225 quote = '\'';
226 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
227 quote = '"';
228
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 for (i = 0; i < op->ob_size; i++) {
232 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000233 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234 *p++ = '\\', *p++ = c;
235 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000236 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000237 while (*p != '\0')
238 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239 }
240 else
241 *p++ = c;
242 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000243 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000244 *p = '\0';
245 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000246 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000248}
249
250static int
Guido van Rossume5372401993-03-16 12:15:04 +0000251string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000252 stringobject *a;
253{
254 return a->ob_size;
255}
256
257static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000258string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 register stringobject *a;
260 register object *bb;
261{
262 register unsigned int size;
263 register stringobject *op;
264 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000265 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266 return NULL;
267 }
268#define b ((stringobject *)bb)
269 /* Optimize cases with empty left or right operand */
270 if (a->ob_size == 0) {
271 INCREF(bb);
272 return bb;
273 }
274 if (b->ob_size == 0) {
275 INCREF(a);
276 return (object *)a;
277 }
278 size = a->ob_size + b->ob_size;
279 op = (stringobject *)
280 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000281 if (op == NULL)
282 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000283 op->ob_type = &Stringtype;
284 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000285#ifdef CACHE_HASH
286 op->ob_shash = -1;
287#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000288 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000289 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
290 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
291 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 return (object *) op;
293#undef b
294}
295
296static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000297string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 register stringobject *a;
299 register int n;
300{
301 register int i;
302 register unsigned int size;
303 register stringobject *op;
304 if (n < 0)
305 n = 0;
306 size = a->ob_size * n;
307 if (size == a->ob_size) {
308 INCREF(a);
309 return (object *)a;
310 }
311 op = (stringobject *)
312 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000313 if (op == NULL)
314 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000315 op->ob_type = &Stringtype;
316 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000317#ifdef CACHE_HASH
318 op->ob_shash = -1;
319#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000320 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000321 for (i = 0; i < size; i += a->ob_size)
322 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
323 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 return (object *) op;
325}
326
327/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
328
329static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000330string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 register stringobject *a;
332 register int i, j; /* May be negative! */
333{
334 if (i < 0)
335 i = 0;
336 if (j < 0)
337 j = 0; /* Avoid signed/unsigned bug in next line */
338 if (j > a->ob_size)
339 j = a->ob_size;
340 if (i == 0 && j == a->ob_size) { /* It's the same as a */
341 INCREF(a);
342 return (object *)a;
343 }
344 if (j < i)
345 j = i;
346 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
347}
348
349static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000350string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351 stringobject *a;
352 register int i;
353{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000354 int c;
355 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000356 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000357 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 return NULL;
359 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000360 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000361 v = (object *) characters[c];
362#ifdef COUNT_ALLOCS
363 if (v != NULL)
364 one_strings++;
365#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000366 if (v == NULL) {
367 v = newsizedstringobject((char *)NULL, 1);
368 if (v == NULL)
369 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000370 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000371 ((stringobject *)v)->ob_sval[0] = c;
372 }
373 INCREF(v);
374 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000375}
376
377static int
Guido van Rossume5372401993-03-16 12:15:04 +0000378string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 stringobject *a, *b;
380{
Guido van Rossum253919f1991-02-13 23:18:39 +0000381 int len_a = a->ob_size, len_b = b->ob_size;
382 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000383 int cmp;
384 if (min_len > 0) {
385 cmp = *a->ob_sval - *b->ob_sval;
386 if (cmp == 0)
387 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
388 if (cmp != 0)
389 return cmp;
390 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000391 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392}
393
Guido van Rossum9bfef441993-03-29 10:43:31 +0000394static long
395string_hash(a)
396 stringobject *a;
397{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000398 register int len;
399 register unsigned char *p;
400 register long x;
401
402#ifdef CACHE_HASH
403 if (a->ob_shash != -1)
404 return a->ob_shash;
405#endif
406 len = a->ob_size;
407 p = (unsigned char *) a->ob_sval;
408 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000409 while (--len >= 0)
410 x = (x + x + x) ^ *p++;
411 x ^= a->ob_size;
412 if (x == -1)
413 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000414#ifdef CACHE_HASH
415 a->ob_shash = x;
416#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000417 return x;
418}
419
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000420static sequence_methods string_as_sequence = {
Guido van Rossume5372401993-03-16 12:15:04 +0000421 string_length, /*sq_length*/
422 string_concat, /*sq_concat*/
423 string_repeat, /*sq_repeat*/
424 string_item, /*sq_item*/
425 string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000426 0, /*sq_ass_item*/
427 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000428};
429
430typeobject Stringtype = {
431 OB_HEAD_INIT(&Typetype)
432 0,
433 "string",
434 sizeof(stringobject),
435 sizeof(char),
Guido van Rossume5372401993-03-16 12:15:04 +0000436 string_dealloc, /*tp_dealloc*/
437 string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000438 0, /*tp_getattr*/
439 0, /*tp_setattr*/
Guido van Rossume5372401993-03-16 12:15:04 +0000440 string_compare, /*tp_compare*/
441 string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000442 0, /*tp_as_number*/
443 &string_as_sequence, /*tp_as_sequence*/
444 0, /*tp_as_mapping*/
Guido van Rossum9bfef441993-03-29 10:43:31 +0000445 string_hash, /*tp_hash*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446};
447
448void
449joinstring(pv, w)
450 register object **pv;
451 register object *w;
452{
453 register object *v;
454 if (*pv == NULL || w == NULL || !is_stringobject(*pv))
455 return;
Guido van Rossume5372401993-03-16 12:15:04 +0000456 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000457 DECREF(*pv);
458 *pv = v;
459}
460
461/* The following function breaks the notion that strings are immutable:
462 it changes the size of a string. We get away with this only if there
463 is only one module referencing the object. You can also think of it
464 as creating a new string object and destroying the old one, only
465 more efficiently. In any case, don't use this if the string may
466 already be known to some other part of the code... */
467
468int
469resizestring(pv, newsize)
470 object **pv;
471 int newsize;
472{
Guido van Rossum921842f1990-11-18 17:30:23 +0000473 register object *v;
474 register stringobject *sv;
475 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476 if (!is_stringobject(v) || v->ob_refcnt != 1) {
477 *pv = 0;
478 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000479 err_badcall();
480 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000482 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum392ab321990-11-18 17:41:19 +0000483#ifdef REF_DEBUG
Guido van Rossum921842f1990-11-18 17:30:23 +0000484 --ref_total;
485#endif
486 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000487 *pv = (object *)
488 realloc((char *)v,
489 sizeof(stringobject) + newsize * sizeof(char));
490 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000491 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000492 err_nomem();
493 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000494 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000495 NEWREF(*pv);
496 sv = (stringobject *) *pv;
497 sv->ob_size = newsize;
498 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000499 return 0;
500}
Guido van Rossume5372401993-03-16 12:15:04 +0000501
502/* Helpers for formatstring */
503
504static object *
505getnextarg(args, arglen, p_argidx)
506 object *args;
507 int arglen;
508 int *p_argidx;
509{
510 int argidx = *p_argidx;
511 if (argidx < arglen) {
512 (*p_argidx)++;
513 if (arglen < 0)
514 return args;
515 else
516 return gettupleitem(args, argidx);
517 }
518 err_setstr(TypeError, "not enough arguments for format string");
519 return NULL;
520}
521
522#define F_LJUST (1<<0)
523#define F_SIGN (1<<1)
524#define F_BLANK (1<<2)
525#define F_ALT (1<<3)
526#define F_ZERO (1<<4)
527
528extern double fabs PROTO((double));
529
530static char *
531formatfloat(flags, prec, type, v)
532 int flags;
533 int prec;
534 int type;
535 object *v;
536{
537 char fmt[20];
538 static char buf[120];
539 double x;
540 if (!getargs(v, "d;float argument required", &x))
541 return NULL;
542 if (prec < 0)
543 prec = 6;
544 if (prec > 50)
545 prec = 50; /* Arbitrary limitation */
546 if (type == 'f' && fabs(x)/1e25 >= 1e25)
547 type = 'g';
548 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
549 sprintf(buf, fmt, x);
550 return buf;
551}
552
553static char *
554formatint(flags, prec, type, v)
555 int flags;
556 int prec;
557 int type;
558 object *v;
559{
560 char fmt[20];
561 static char buf[50];
562 long x;
563 if (!getargs(v, "l;int argument required", &x))
564 return NULL;
565 if (prec < 0)
566 prec = 1;
567 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
568 sprintf(buf, fmt, x);
569 return buf;
570}
571
572static char *
573formatchar(v)
574 object *v;
575{
576 static char buf[2];
577 if (is_stringobject(v)) {
578 if (!getargs(v, "c;%c requires int or char", &buf[0]))
579 return NULL;
580 }
581 else {
582 if (!getargs(v, "b;%c requires int or char", &buf[0]))
583 return NULL;
584 }
585 buf[1] = '\0';
586 return buf;
587}
588
589/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
590
591object *
592formatstring(format, args)
593 object *format;
594 object *args;
595{
596 char *fmt, *res;
597 int fmtcnt, rescnt, reslen, arglen, argidx;
598 object *result;
599 if (format == NULL || !is_stringobject(format) || args == NULL) {
600 err_badcall();
601 return NULL;
602 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000603 fmt = getstringvalue(format);
604 fmtcnt = getstringsize(format);
605 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000606 result = newsizedstringobject((char *)NULL, reslen);
607 if (result == NULL)
608 return NULL;
609 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000610 if (is_tupleobject(args)) {
611 arglen = gettuplesize(args);
612 argidx = 0;
613 }
614 else {
615 arglen = -1;
616 argidx = -2;
617 }
618 while (--fmtcnt >= 0) {
619 if (*fmt != '%') {
620 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000621 rescnt = fmtcnt + 100;
622 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000623 if (resizestring(&result, reslen) < 0)
624 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000625 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000626 }
627 *res++ = *fmt++;
628 }
629 else {
630 /* Got a format specifier */
631 int flags = 0;
632 char *fmtstart = fmt++;
633 int width = -1;
634 int prec = -1;
635 int size = 0;
636 int c;
637 int fill;
638 object *v;
639 char *buf;
640 int sign;
641 int len;
642 while (--fmtcnt >= 0) {
643 switch (c = *fmt++) {
644 case '-': flags |= F_LJUST; continue;
645 case '+': flags |= F_SIGN; continue;
646 case ' ': flags |= F_BLANK; continue;
647 case '#': flags |= F_ALT; continue;
648 case '0': flags |= F_ZERO; continue;
649 }
650 break;
651 }
652 if (c == '*') {
653 v = getnextarg(args, arglen, &argidx);
654 if (v == NULL)
655 goto error;
656 if (!is_intobject(v)) {
657 err_setstr(TypeError, "* wants int");
658 goto error;
659 }
660 width = getintvalue(v);
661 if (width < 0)
662 width = 0;
663 if (--fmtcnt >= 0)
664 c = *fmt++;
665 }
666 else if (isdigit(c)) {
667 width = c - '0';
668 while (--fmtcnt >= 0) {
669 c = *fmt++;
670 if (!isdigit(c))
671 break;
672 if ((width*10) / 10 != width) {
673 err_setstr(ValueError,
674 "width too big");
675 goto error;
676 }
677 width = width*10 + (c - '0');
678 }
679 }
680 if (c == '.') {
681 prec = 0;
682 if (--fmtcnt >= 0)
683 c = *fmt++;
684 if (c == '*') {
685 v = getnextarg(args, arglen, &argidx);
686 if (v == NULL)
687 goto error;
688 if (!is_intobject(v)) {
689 err_setstr(TypeError,
690 "* wants int");
691 goto error;
692 }
693 prec = getintvalue(v);
694 if (prec < 0)
695 prec = 0;
696 if (--fmtcnt >= 0)
697 c = *fmt++;
698 }
699 else if (isdigit(c)) {
700 prec = c - '0';
701 while (--fmtcnt >= 0) {
702 c = *fmt++;
703 if (!isdigit(c))
704 break;
705 if ((prec*10) / 10 != prec) {
706 err_setstr(ValueError,
707 "prec too big");
708 goto error;
709 }
710 prec = prec*10 + (c - '0');
711 }
712 }
713 } /* prec */
714 if (fmtcnt >= 0) {
715 if (c == 'h' || c == 'l' || c == 'L') {
716 size = c;
717 if (--fmtcnt >= 0)
718 c = *fmt++;
719 }
720 }
721 if (fmtcnt < 0) {
722 err_setstr(ValueError, "incomplete format");
723 goto error;
724 }
725 if (c != '%') {
726 v = getnextarg(args, arglen, &argidx);
727 if (v == NULL)
728 goto error;
729 }
730 sign = 0;
731 fill = ' ';
732 switch (c) {
733 case '%':
734 buf = "%";
735 len = 1;
736 break;
737 case 's':
738 if (!is_stringobject(v)) {
739 err_setstr(TypeError,
740 "%s wants string");
741 goto error;
742 }
743 buf = getstringvalue(v);
744 len = getstringsize(v);
745 if (prec >= 0 && len > prec)
746 len = prec;
747 break;
748 case 'i':
749 case 'd':
750 case 'u':
751 case 'o':
752 case 'x':
753 case 'X':
754 if (c == 'i')
755 c = 'd';
756 buf = formatint(flags, prec, c, v);
757 if (buf == NULL)
758 goto error;
759 len = strlen(buf);
760 sign = (c == 'd');
761 if (flags&F_ZERO)
762 fill = '0';
763 break;
764 case 'e':
765 case 'E':
766 case 'f':
767 case 'g':
768 case 'G':
769 buf = formatfloat(flags, prec, c, v);
770 if (buf == NULL)
771 goto error;
772 len = strlen(buf);
773 sign = 1;
774 if (flags&F_ZERO)
775 fill = '0';
776 break;
777 case 'c':
778 buf = formatchar(v);
779 if (buf == NULL)
780 goto error;
781 len = strlen(buf);
782 break;
783 default:
784 err_setstr(ValueError,
785 "unsupported format character");
786 goto error;
787 }
Guido van Rossum234f9421993-06-17 12:35:49 +0000788 /* XXX There's a bug somewhere here so that
789 XXX '%4d'%-1 yields '- 1' ... */
Guido van Rossume5372401993-03-16 12:15:04 +0000790 if (sign) {
791 if (*buf == '-' || *buf == '+') {
792 sign = *buf++;
793 len--;
794 }
795 else if (flags & F_SIGN)
796 sign = '+';
797 else if (flags & F_BLANK)
798 sign = ' ';
799 else
800 sign = '\0';
801 }
802 if (width < len)
803 width = len;
804 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000805 reslen -= rescnt;
806 rescnt = width + fmtcnt + 100;
807 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000808 if (resizestring(&result, reslen) < 0)
809 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000810 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000811 }
812 if (sign) {
813 *res++ = sign;
814 rescnt--;
815 if (width > len)
816 width--;
817 }
818 if (width > len && !(flags&F_LJUST)) {
819 do {
820 --rescnt;
821 *res++ = fill;
822 } while (--width > len);
823 }
824 memcpy(res, buf, len);
825 res += len;
826 rescnt -= len;
827 while (--width >= len) {
828 --rescnt;
829 *res++ = ' ';
830 }
831 } /* '%' */
832 } /* until end */
833 if (argidx < arglen) {
834 err_setstr(TypeError, "not all arguments converted");
835 goto error;
836 }
837 resizestring(&result, reslen - rescnt);
838 return result;
839 error:
840 DECREF(result);
841 return NULL;
842}