blob: 7df894e12c76f517e115ad7c3ba9066d3130d534 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* String object implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
Guido van Rossum013142a1994-08-30 08:19:36 +000029#include <ctype.h>
30
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000031#ifdef COUNT_ALLOCS
32int null_strings, one_strings;
33#endif
34
Guido van Rossum03093a21994-09-28 15:51:32 +000035#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000036#include <limits.h>
37#else
38#ifndef UCHAR_MAX
39#define UCHAR_MAX 255
40#endif
41#endif
42
43static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000046#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047
48/*
49 Newsizedstringobject() and newstringobject() try in certain cases
50 to share string objects. When the size of the string is zero,
51 these routines always return a pointer to the same string object;
52 when the size is one, they return a pointer to an already existing
53 object if the contents of the string is known. For
54 newstringobject() this is always the case, for
55 newsizedstringobject() this is the case when the first argument in
56 not NULL.
57 A common practice to allocate a string and then fill it in or
58 change it must be done carefully. It is only allowed to change the
59 contents of the string if the obect was gotten from
60 newsizedstringobject() with a NULL first argument, because in the
61 future these routines may try to do even more sharing of objects.
62*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000063object *
64newsizedstringobject(str, size)
65 char *str;
66 int size;
67{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000069#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 if (size == 0 && (op = nullstring) != NULL) {
71#ifdef COUNT_ALLOCS
72 null_strings++;
73#endif
74 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000075 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000076 }
77 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
78#ifdef COUNT_ALLOCS
79 one_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000084#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (op == NULL)
88 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000089 op->ob_type = &Stringtype;
90 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091#ifdef CACHE_HASH
92 op->ob_shash = -1;
93#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +000094 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000095 if (str != NULL)
96 memcpy(op->ob_sval, str, size);
97 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000099 if (size == 0) {
100 nullstring = op;
101 INCREF(op);
102 } else if (size == 1 && str != NULL) {
103 characters[*str & UCHAR_MAX] = op;
104 INCREF(op);
105 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000106#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000107 return (object *) op;
108}
109
110object *
111newstringobject(str)
112 char *str;
113{
114 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000116#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
121 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000122 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
128 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000129 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000131#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000133 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000134 if (op == NULL)
135 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000136 op->ob_type = &Stringtype;
137 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138#ifdef CACHE_HASH
139 op->ob_shash = -1;
140#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000141 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000142 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000143#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
145 nullstring = op;
146 INCREF(op);
147 } else if (size == 1) {
148 characters[*str & UCHAR_MAX] = op;
149 INCREF(op);
150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152 return (object *) op;
153}
154
Guido van Rossum234f9421993-06-17 12:35:49 +0000155static void
Guido van Rossume5372401993-03-16 12:15:04 +0000156string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000157 object *op;
158{
159 DEL(op);
160}
161
Guido van Rossumd7047b31995-01-02 19:07:15 +0000162int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000163getstringsize(op)
164 register object *op;
165{
166 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000167 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000168 return -1;
169 }
170 return ((stringobject *)op) -> ob_size;
171}
172
173/*const*/ char *
174getstringvalue(op)
175 register object *op;
176{
177 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000178 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 return NULL;
180 }
181 return ((stringobject *)op) -> ob_sval;
182}
183
184/* Methods */
185
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000186static int
Guido van Rossume5372401993-03-16 12:15:04 +0000187string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000188 stringobject *op;
189 FILE *fp;
190 int flags;
191{
192 int i;
193 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000194 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000195 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000196 if (flags & PRINT_RAW) {
197 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000198 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000199 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200
201 /* figure out which quote to use; single is prefered */
202 quote = '\'';
203 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
204 quote = '"';
205
206 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000207 for (i = 0; i < op->ob_size; i++) {
208 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000209 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 fprintf(fp, "\\%c", c);
211 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000212 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000214 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000217 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218}
219
220static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000221string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222 register stringobject *op;
223{
224 /* XXX overflow? */
225 int newsize = 2 + 4 * op->ob_size * sizeof(char);
226 object *v = newsizedstringobject((char *)NULL, newsize);
227 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000228 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 }
230 else {
231 register int i;
232 register char c;
233 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000234 int quote;
235
236 /* figure out which quote to use; single is prefered */
237 quote = '\'';
238 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
239 quote = '"';
240
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000241 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000242 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 for (i = 0; i < op->ob_size; i++) {
244 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000245 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000246 *p++ = '\\', *p++ = c;
247 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000248 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000249 while (*p != '\0')
250 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000251 }
252 else
253 *p++ = c;
254 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 *p = '\0';
257 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000258 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262static int
Guido van Rossume5372401993-03-16 12:15:04 +0000263string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 stringobject *a;
265{
266 return a->ob_size;
267}
268
269static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000270string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271 register stringobject *a;
272 register object *bb;
273{
274 register unsigned int size;
275 register stringobject *op;
276 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000277 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278 return NULL;
279 }
280#define b ((stringobject *)bb)
281 /* Optimize cases with empty left or right operand */
282 if (a->ob_size == 0) {
283 INCREF(bb);
284 return bb;
285 }
286 if (b->ob_size == 0) {
287 INCREF(a);
288 return (object *)a;
289 }
290 size = a->ob_size + b->ob_size;
291 op = (stringobject *)
292 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000293 if (op == NULL)
294 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000295 op->ob_type = &Stringtype;
296 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000297#ifdef CACHE_HASH
298 op->ob_shash = -1;
299#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000300 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000301 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
302 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
303 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304 return (object *) op;
305#undef b
306}
307
308static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000309string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 register stringobject *a;
311 register int n;
312{
313 register int i;
314 register unsigned int size;
315 register stringobject *op;
316 if (n < 0)
317 n = 0;
318 size = a->ob_size * n;
319 if (size == a->ob_size) {
320 INCREF(a);
321 return (object *)a;
322 }
323 op = (stringobject *)
324 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000325 if (op == NULL)
326 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000327 op->ob_type = &Stringtype;
328 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000329#ifdef CACHE_HASH
330 op->ob_shash = -1;
331#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000332 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000333 for (i = 0; i < size; i += a->ob_size)
334 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
335 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 return (object *) op;
337}
338
339/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
340
341static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000342string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 register stringobject *a;
344 register int i, j; /* May be negative! */
345{
346 if (i < 0)
347 i = 0;
348 if (j < 0)
349 j = 0; /* Avoid signed/unsigned bug in next line */
350 if (j > a->ob_size)
351 j = a->ob_size;
352 if (i == 0 && j == a->ob_size) { /* It's the same as a */
353 INCREF(a);
354 return (object *)a;
355 }
356 if (j < i)
357 j = i;
358 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
359}
360
361static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000362string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 stringobject *a;
364 register int i;
365{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000366 int c;
367 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000369 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 return NULL;
371 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000372 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000373 v = (object *) characters[c];
374#ifdef COUNT_ALLOCS
375 if (v != NULL)
376 one_strings++;
377#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000378 if (v == NULL) {
379 v = newsizedstringobject((char *)NULL, 1);
380 if (v == NULL)
381 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000382 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000383 ((stringobject *)v)->ob_sval[0] = c;
384 }
385 INCREF(v);
386 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387}
388
389static int
Guido van Rossume5372401993-03-16 12:15:04 +0000390string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 stringobject *a, *b;
392{
Guido van Rossum253919f1991-02-13 23:18:39 +0000393 int len_a = a->ob_size, len_b = b->ob_size;
394 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000395 int cmp;
396 if (min_len > 0) {
397 cmp = *a->ob_sval - *b->ob_sval;
398 if (cmp == 0)
399 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
400 if (cmp != 0)
401 return cmp;
402 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000403 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404}
405
Guido van Rossum9bfef441993-03-29 10:43:31 +0000406static long
407string_hash(a)
408 stringobject *a;
409{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000410 register int len;
411 register unsigned char *p;
412 register long x;
413
414#ifdef CACHE_HASH
415 if (a->ob_shash != -1)
416 return a->ob_shash;
417#endif
418 len = a->ob_size;
419 p = (unsigned char *) a->ob_sval;
420 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000421 while (--len >= 0)
422 x = (x + x + x) ^ *p++;
423 x ^= a->ob_size;
424 if (x == -1)
425 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000426#ifdef CACHE_HASH
427 a->ob_shash = x;
428#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000429 return x;
430}
431
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000432static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000433 (inquiry)string_length, /*sq_length*/
434 (binaryfunc)string_concat, /*sq_concat*/
435 (intargfunc)string_repeat, /*sq_repeat*/
436 (intargfunc)string_item, /*sq_item*/
437 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000438 0, /*sq_ass_item*/
439 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440};
441
442typeobject Stringtype = {
443 OB_HEAD_INIT(&Typetype)
444 0,
445 "string",
446 sizeof(stringobject),
447 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000448 (destructor)string_dealloc, /*tp_dealloc*/
449 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450 0, /*tp_getattr*/
451 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000452 (cmpfunc)string_compare, /*tp_compare*/
453 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 0, /*tp_as_number*/
455 &string_as_sequence, /*tp_as_sequence*/
456 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000457 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458};
459
460void
461joinstring(pv, w)
462 register object **pv;
463 register object *w;
464{
465 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000466 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000468 if (w == NULL || !is_stringobject(*pv)) {
469 DECREF(*pv);
470 *pv = NULL;
471 return;
472 }
Guido van Rossume5372401993-03-16 12:15:04 +0000473 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 DECREF(*pv);
475 *pv = v;
476}
477
Guido van Rossum013142a1994-08-30 08:19:36 +0000478void
479joinstring_decref(pv, w)
480 register object **pv;
481 register object *w;
482{
483 joinstring(pv, w);
484 XDECREF(w);
485}
486
487
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000488/* The following function breaks the notion that strings are immutable:
489 it changes the size of a string. We get away with this only if there
490 is only one module referencing the object. You can also think of it
491 as creating a new string object and destroying the old one, only
492 more efficiently. In any case, don't use this if the string may
493 already be known to some other part of the code... */
494
495int
496resizestring(pv, newsize)
497 object **pv;
498 int newsize;
499{
Guido van Rossum921842f1990-11-18 17:30:23 +0000500 register object *v;
501 register stringobject *sv;
502 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503 if (!is_stringobject(v) || v->ob_refcnt != 1) {
504 *pv = 0;
505 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000506 err_badcall();
507 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000509 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum392ab321990-11-18 17:41:19 +0000510#ifdef REF_DEBUG
Guido van Rossum921842f1990-11-18 17:30:23 +0000511 --ref_total;
512#endif
513 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514 *pv = (object *)
515 realloc((char *)v,
516 sizeof(stringobject) + newsize * sizeof(char));
517 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000518 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000519 err_nomem();
520 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000522 NEWREF(*pv);
523 sv = (stringobject *) *pv;
524 sv->ob_size = newsize;
525 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526 return 0;
527}
Guido van Rossume5372401993-03-16 12:15:04 +0000528
529/* Helpers for formatstring */
530
531static object *
532getnextarg(args, arglen, p_argidx)
533 object *args;
534 int arglen;
535 int *p_argidx;
536{
537 int argidx = *p_argidx;
538 if (argidx < arglen) {
539 (*p_argidx)++;
540 if (arglen < 0)
541 return args;
542 else
543 return gettupleitem(args, argidx);
544 }
545 err_setstr(TypeError, "not enough arguments for format string");
546 return NULL;
547}
548
549#define F_LJUST (1<<0)
550#define F_SIGN (1<<1)
551#define F_BLANK (1<<2)
552#define F_ALT (1<<3)
553#define F_ZERO (1<<4)
554
555extern double fabs PROTO((double));
556
557static char *
558formatfloat(flags, prec, type, v)
559 int flags;
560 int prec;
561 int type;
562 object *v;
563{
564 char fmt[20];
565 static char buf[120];
566 double x;
567 if (!getargs(v, "d;float argument required", &x))
568 return NULL;
569 if (prec < 0)
570 prec = 6;
571 if (prec > 50)
572 prec = 50; /* Arbitrary limitation */
573 if (type == 'f' && fabs(x)/1e25 >= 1e25)
574 type = 'g';
575 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
576 sprintf(buf, fmt, x);
577 return buf;
578}
579
580static char *
581formatint(flags, prec, type, v)
582 int flags;
583 int prec;
584 int type;
585 object *v;
586{
587 char fmt[20];
588 static char buf[50];
589 long x;
590 if (!getargs(v, "l;int argument required", &x))
591 return NULL;
592 if (prec < 0)
593 prec = 1;
594 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
595 sprintf(buf, fmt, x);
596 return buf;
597}
598
599static char *
600formatchar(v)
601 object *v;
602{
603 static char buf[2];
604 if (is_stringobject(v)) {
605 if (!getargs(v, "c;%c requires int or char", &buf[0]))
606 return NULL;
607 }
608 else {
609 if (!getargs(v, "b;%c requires int or char", &buf[0]))
610 return NULL;
611 }
612 buf[1] = '\0';
613 return buf;
614}
615
Guido van Rossum013142a1994-08-30 08:19:36 +0000616/* XXX this could be moved to object.c */
617static object *
618get_mapping_item(mo, ko)
619 object *mo;
620 object *ko;
621{
622 mapping_methods *mm = mo->ob_type->tp_as_mapping;
623 object *val;
624
625 if (!mm || !mm->mp_subscript) {
626 err_setstr(TypeError, "subscript not implemented");
627 return NULL;
628 }
629
630 val = (*mm->mp_subscript)(mo, ko);
631 XDECREF(val); /* still in mapping */
632
633 return val;
634}
635
636
Guido van Rossume5372401993-03-16 12:15:04 +0000637/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
638
639object *
640formatstring(format, args)
641 object *format;
642 object *args;
643{
644 char *fmt, *res;
645 int fmtcnt, rescnt, reslen, arglen, argidx;
646 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000647 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000648 if (format == NULL || !is_stringobject(format) || args == NULL) {
649 err_badcall();
650 return NULL;
651 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000652 fmt = getstringvalue(format);
653 fmtcnt = getstringsize(format);
654 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000655 result = newsizedstringobject((char *)NULL, reslen);
656 if (result == NULL)
657 return NULL;
658 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000659 if (is_tupleobject(args)) {
660 arglen = gettuplesize(args);
661 argidx = 0;
662 }
663 else {
664 arglen = -1;
665 argidx = -2;
666 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000667 if (args->ob_type->tp_as_mapping)
668 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000669 while (--fmtcnt >= 0) {
670 if (*fmt != '%') {
671 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000672 rescnt = fmtcnt + 100;
673 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000674 if (resizestring(&result, reslen) < 0)
675 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000676 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000677 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000678 }
679 *res++ = *fmt++;
680 }
681 else {
682 /* Got a format specifier */
683 int flags = 0;
684 char *fmtstart = fmt++;
685 int width = -1;
686 int prec = -1;
687 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000688 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000689 int fill;
690 object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000691 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000692 char *buf;
693 int sign;
694 int len;
Guido van Rossum013142a1994-08-30 08:19:36 +0000695 if (*fmt == '(') {
696 char *keystart;
697 int keylen;
698 object *key;
699
700 if (dict == NULL) {
701 err_setstr(TypeError,
702 "format requires a mapping");
703 goto error;
704 }
705 ++fmt;
706 --fmtcnt;
707 keystart = fmt;
708 while (--fmtcnt >= 0 && *fmt != ')')
709 fmt++;
710 keylen = fmt - keystart;
711 ++fmt;
712 if (fmtcnt < 0) {
713 err_setstr(ValueError,
714 "incomplete format key");
715 goto error;
716 }
717 key = newsizedstringobject(keystart, keylen);
718 if (key == NULL)
719 goto error;
720 args = get_mapping_item(dict, key);
721 DECREF(key);
722 if (args == NULL) {
723 goto error;
724 }
725 arglen = -1;
726 argidx = -2;
727 }
Guido van Rossume5372401993-03-16 12:15:04 +0000728 while (--fmtcnt >= 0) {
729 switch (c = *fmt++) {
730 case '-': flags |= F_LJUST; continue;
731 case '+': flags |= F_SIGN; continue;
732 case ' ': flags |= F_BLANK; continue;
733 case '#': flags |= F_ALT; continue;
734 case '0': flags |= F_ZERO; continue;
735 }
736 break;
737 }
738 if (c == '*') {
739 v = getnextarg(args, arglen, &argidx);
740 if (v == NULL)
741 goto error;
742 if (!is_intobject(v)) {
743 err_setstr(TypeError, "* wants int");
744 goto error;
745 }
746 width = getintvalue(v);
747 if (width < 0)
748 width = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000752 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000753 width = c - '0';
754 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000755 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000756 if (!isdigit(c))
757 break;
758 if ((width*10) / 10 != width) {
759 err_setstr(ValueError,
760 "width too big");
761 goto error;
762 }
763 width = width*10 + (c - '0');
764 }
765 }
766 if (c == '.') {
767 prec = 0;
768 if (--fmtcnt >= 0)
769 c = *fmt++;
770 if (c == '*') {
771 v = getnextarg(args, arglen, &argidx);
772 if (v == NULL)
773 goto error;
774 if (!is_intobject(v)) {
775 err_setstr(TypeError,
776 "* wants int");
777 goto error;
778 }
779 prec = getintvalue(v);
780 if (prec < 0)
781 prec = 0;
782 if (--fmtcnt >= 0)
783 c = *fmt++;
784 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000785 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000786 prec = c - '0';
787 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000788 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000789 if (!isdigit(c))
790 break;
791 if ((prec*10) / 10 != prec) {
792 err_setstr(ValueError,
793 "prec too big");
794 goto error;
795 }
796 prec = prec*10 + (c - '0');
797 }
798 }
799 } /* prec */
800 if (fmtcnt >= 0) {
801 if (c == 'h' || c == 'l' || c == 'L') {
802 size = c;
803 if (--fmtcnt >= 0)
804 c = *fmt++;
805 }
806 }
807 if (fmtcnt < 0) {
808 err_setstr(ValueError, "incomplete format");
809 goto error;
810 }
811 if (c != '%') {
812 v = getnextarg(args, arglen, &argidx);
813 if (v == NULL)
814 goto error;
815 }
816 sign = 0;
817 fill = ' ';
818 switch (c) {
819 case '%':
820 buf = "%";
821 len = 1;
822 break;
823 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000824 temp = strobject(v);
825 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000826 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000827 buf = getstringvalue(temp);
828 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000829 if (prec >= 0 && len > prec)
830 len = prec;
831 break;
832 case 'i':
833 case 'd':
834 case 'u':
835 case 'o':
836 case 'x':
837 case 'X':
838 if (c == 'i')
839 c = 'd';
840 buf = formatint(flags, prec, c, v);
841 if (buf == NULL)
842 goto error;
843 len = strlen(buf);
844 sign = (c == 'd');
845 if (flags&F_ZERO)
846 fill = '0';
847 break;
848 case 'e':
849 case 'E':
850 case 'f':
851 case 'g':
852 case 'G':
853 buf = formatfloat(flags, prec, c, v);
854 if (buf == NULL)
855 goto error;
856 len = strlen(buf);
857 sign = 1;
858 if (flags&F_ZERO)
859 fill = '0';
860 break;
861 case 'c':
862 buf = formatchar(v);
863 if (buf == NULL)
864 goto error;
Guido van Rossum6938a291993-11-11 14:51:57 +0000865 len = 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000866 break;
867 default:
868 err_setstr(ValueError,
869 "unsupported format character");
870 goto error;
871 }
872 if (sign) {
873 if (*buf == '-' || *buf == '+') {
874 sign = *buf++;
875 len--;
876 }
877 else if (flags & F_SIGN)
878 sign = '+';
879 else if (flags & F_BLANK)
880 sign = ' ';
881 else
882 sign = '\0';
883 }
884 if (width < len)
885 width = len;
886 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000887 reslen -= rescnt;
888 rescnt = width + fmtcnt + 100;
889 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000890 if (resizestring(&result, reslen) < 0)
891 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000892 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000893 }
894 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000895 if (fill != ' ')
896 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000897 rescnt--;
898 if (width > len)
899 width--;
900 }
901 if (width > len && !(flags&F_LJUST)) {
902 do {
903 --rescnt;
904 *res++ = fill;
905 } while (--width > len);
906 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000907 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000908 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000909 memcpy(res, buf, len);
910 res += len;
911 rescnt -= len;
912 while (--width >= len) {
913 --rescnt;
914 *res++ = ' ';
915 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000916 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000917 err_setstr(TypeError,
918 "not all arguments converted");
919 goto error;
920 }
921 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000922 } /* '%' */
923 } /* until end */
924 if (argidx < arglen) {
925 err_setstr(TypeError, "not all arguments converted");
926 goto error;
927 }
928 resizestring(&result, reslen - rescnt);
929 return result;
930 error:
931 DECREF(result);
932 return NULL;
933}