blob: b880891d9aa59a7273926984df2b5bd285c19201 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum013142a1994-08-30 08:19:36 +000036#include <ctype.h>
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038#ifdef COUNT_ALLOCS
39int null_strings, one_strings;
40#endif
41
Guido van Rossum03093a21994-09-28 15:51:32 +000042#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043#include <limits.h>
44#else
45#ifndef UCHAR_MAX
46#define UCHAR_MAX 255
47#endif
48#endif
49
50static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000051#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054
55/*
56 Newsizedstringobject() and newstringobject() try in certain cases
57 to share string objects. When the size of the string is zero,
58 these routines always return a pointer to the same string object;
59 when the size is one, they return a pointer to an already existing
60 object if the contents of the string is known. For
61 newstringobject() this is always the case, for
62 newsizedstringobject() this is the case when the first argument in
63 not NULL.
64 A common practice to allocate a string and then fill it in or
65 change it must be done carefully. It is only allowed to change the
66 contents of the string if the obect was gotten from
67 newsizedstringobject() with a NULL first argument, because in the
68 future these routines may try to do even more sharing of objects.
69*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070object *
71newsizedstringobject(str, size)
72 char *str;
73 int size;
74{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000076#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0 && (op = nullstring) != NULL) {
78#ifdef COUNT_ALLOCS
79 null_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
84 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
85#ifdef COUNT_ALLOCS
86 one_strings++;
87#endif
88 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000089 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000091#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000094 if (op == NULL)
95 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000096 op->ob_type = &Stringtype;
97 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098#ifdef CACHE_HASH
99 op->ob_shash = -1;
100#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000101 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000102 if (str != NULL)
103 memcpy(op->ob_sval, str, size);
104 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000105#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0) {
107 nullstring = op;
108 INCREF(op);
109 } else if (size == 1 && str != NULL) {
110 characters[*str & UCHAR_MAX] = op;
111 INCREF(op);
112 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000113#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000114 return (object *) op;
115}
116
117object *
118newstringobject(str)
119 char *str;
120{
121 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000122 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000123#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 if (size == 0 && (op = nullstring) != NULL) {
125#ifdef COUNT_ALLOCS
126 null_strings++;
127#endif
128 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000129 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
131 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
132#ifdef COUNT_ALLOCS
133 one_strings++;
134#endif
135 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000136 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000138#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000141 if (op == NULL)
142 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000143 op->ob_type = &Stringtype;
144 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145#ifdef CACHE_HASH
146 op->ob_shash = -1;
147#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000148 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000150#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 if (size == 0) {
152 nullstring = op;
153 INCREF(op);
154 } else if (size == 1) {
155 characters[*str & UCHAR_MAX] = op;
156 INCREF(op);
157 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000158#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159 return (object *) op;
160}
161
Guido van Rossum234f9421993-06-17 12:35:49 +0000162static void
Guido van Rossume5372401993-03-16 12:15:04 +0000163string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000164 object *op;
165{
166 DEL(op);
167}
168
Guido van Rossumd7047b31995-01-02 19:07:15 +0000169int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000170getstringsize(op)
171 register object *op;
172{
173 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000174 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000175 return -1;
176 }
177 return ((stringobject *)op) -> ob_size;
178}
179
180/*const*/ char *
181getstringvalue(op)
182 register object *op;
183{
184 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000185 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000186 return NULL;
187 }
188 return ((stringobject *)op) -> ob_sval;
189}
190
191/* Methods */
192
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000193static int
Guido van Rossume5372401993-03-16 12:15:04 +0000194string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 stringobject *op;
196 FILE *fp;
197 int flags;
198{
199 int i;
200 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000201 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203 if (flags & PRINT_RAW) {
204 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000205 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000206 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000207
208 /* figure out which quote to use; single is prefered */
209 quote = '\'';
210 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
211 quote = '"';
212
213 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000214 for (i = 0; i < op->ob_size; i++) {
215 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000217 fprintf(fp, "\\%c", c);
218 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000219 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000221 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000222 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000223 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000224 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000225}
226
227static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000228string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 register stringobject *op;
230{
231 /* XXX overflow? */
232 int newsize = 2 + 4 * op->ob_size * sizeof(char);
233 object *v = newsizedstringobject((char *)NULL, newsize);
234 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000235 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000236 }
237 else {
238 register int i;
239 register char c;
240 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000241 int quote;
242
243 /* figure out which quote to use; single is prefered */
244 quote = '\'';
245 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
246 quote = '"';
247
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000248 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000249 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000250 for (i = 0; i < op->ob_size; i++) {
251 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000252 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253 *p++ = '\\', *p++ = c;
254 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 while (*p != '\0')
257 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000258 }
259 else
260 *p++ = c;
261 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000262 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000263 *p = '\0';
264 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000265 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267}
268
269static int
Guido van Rossume5372401993-03-16 12:15:04 +0000270string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271 stringobject *a;
272{
273 return a->ob_size;
274}
275
276static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000277string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278 register stringobject *a;
279 register object *bb;
280{
281 register unsigned int size;
282 register stringobject *op;
283 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000284 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000285 return NULL;
286 }
287#define b ((stringobject *)bb)
288 /* Optimize cases with empty left or right operand */
289 if (a->ob_size == 0) {
290 INCREF(bb);
291 return bb;
292 }
293 if (b->ob_size == 0) {
294 INCREF(a);
295 return (object *)a;
296 }
297 size = a->ob_size + b->ob_size;
298 op = (stringobject *)
299 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000300 if (op == NULL)
301 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000302 op->ob_type = &Stringtype;
303 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000304#ifdef CACHE_HASH
305 op->ob_shash = -1;
306#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000307 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000308 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
309 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
310 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000311 return (object *) op;
312#undef b
313}
314
315static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000316string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 register stringobject *a;
318 register int n;
319{
320 register int i;
321 register unsigned int size;
322 register stringobject *op;
323 if (n < 0)
324 n = 0;
325 size = a->ob_size * n;
326 if (size == a->ob_size) {
327 INCREF(a);
328 return (object *)a;
329 }
330 op = (stringobject *)
331 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000332 if (op == NULL)
333 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000334 op->ob_type = &Stringtype;
335 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000336#ifdef CACHE_HASH
337 op->ob_shash = -1;
338#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000339 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000340 for (i = 0; i < size; i += a->ob_size)
341 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
342 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 return (object *) op;
344}
345
346/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
347
348static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000349string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350 register stringobject *a;
351 register int i, j; /* May be negative! */
352{
353 if (i < 0)
354 i = 0;
355 if (j < 0)
356 j = 0; /* Avoid signed/unsigned bug in next line */
357 if (j > a->ob_size)
358 j = a->ob_size;
359 if (i == 0 && j == a->ob_size) { /* It's the same as a */
360 INCREF(a);
361 return (object *)a;
362 }
363 if (j < i)
364 j = i;
365 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
366}
367
368static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000369string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 stringobject *a;
371 register int i;
372{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000373 int c;
374 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000375 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000376 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000377 return NULL;
378 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000379 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000380 v = (object *) characters[c];
381#ifdef COUNT_ALLOCS
382 if (v != NULL)
383 one_strings++;
384#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000385 if (v == NULL) {
386 v = newsizedstringobject((char *)NULL, 1);
387 if (v == NULL)
388 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000389 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000390 ((stringobject *)v)->ob_sval[0] = c;
391 }
392 INCREF(v);
393 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394}
395
396static int
Guido van Rossume5372401993-03-16 12:15:04 +0000397string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000398 stringobject *a, *b;
399{
Guido van Rossum253919f1991-02-13 23:18:39 +0000400 int len_a = a->ob_size, len_b = b->ob_size;
401 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000402 int cmp;
403 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000404 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000405 if (cmp == 0)
406 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
407 if (cmp != 0)
408 return cmp;
409 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000410 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000411}
412
Guido van Rossum9bfef441993-03-29 10:43:31 +0000413static long
414string_hash(a)
415 stringobject *a;
416{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000417 register int len;
418 register unsigned char *p;
419 register long x;
420
421#ifdef CACHE_HASH
422 if (a->ob_shash != -1)
423 return a->ob_shash;
424#endif
425 len = a->ob_size;
426 p = (unsigned char *) a->ob_sval;
427 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000428 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000429 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000430 x ^= a->ob_size;
431 if (x == -1)
432 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000433#ifdef CACHE_HASH
434 a->ob_shash = x;
435#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000436 return x;
437}
438
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000440 (inquiry)string_length, /*sq_length*/
441 (binaryfunc)string_concat, /*sq_concat*/
442 (intargfunc)string_repeat, /*sq_repeat*/
443 (intargfunc)string_item, /*sq_item*/
444 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000445 0, /*sq_ass_item*/
446 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000447};
448
449typeobject Stringtype = {
450 OB_HEAD_INIT(&Typetype)
451 0,
452 "string",
453 sizeof(stringobject),
454 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000455 (destructor)string_dealloc, /*tp_dealloc*/
456 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000457 0, /*tp_getattr*/
458 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000459 (cmpfunc)string_compare, /*tp_compare*/
460 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000461 0, /*tp_as_number*/
462 &string_as_sequence, /*tp_as_sequence*/
463 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000464 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000465};
466
467void
468joinstring(pv, w)
469 register object **pv;
470 register object *w;
471{
472 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000473 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000475 if (w == NULL || !is_stringobject(*pv)) {
476 DECREF(*pv);
477 *pv = NULL;
478 return;
479 }
Guido van Rossume5372401993-03-16 12:15:04 +0000480 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 DECREF(*pv);
482 *pv = v;
483}
484
Guido van Rossum013142a1994-08-30 08:19:36 +0000485void
486joinstring_decref(pv, w)
487 register object **pv;
488 register object *w;
489{
490 joinstring(pv, w);
491 XDECREF(w);
492}
493
494
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000495/* The following function breaks the notion that strings are immutable:
496 it changes the size of a string. We get away with this only if there
497 is only one module referencing the object. You can also think of it
498 as creating a new string object and destroying the old one, only
499 more efficiently. In any case, don't use this if the string may
500 already be known to some other part of the code... */
501
502int
503resizestring(pv, newsize)
504 object **pv;
505 int newsize;
506{
Guido van Rossum921842f1990-11-18 17:30:23 +0000507 register object *v;
508 register stringobject *sv;
509 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510 if (!is_stringobject(v) || v->ob_refcnt != 1) {
511 *pv = 0;
512 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000513 err_badcall();
514 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000515 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000516 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +0000517#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +0000518 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +0000519#endif
520 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 *pv = (object *)
522 realloc((char *)v,
523 sizeof(stringobject) + newsize * sizeof(char));
524 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000525 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000526 err_nomem();
527 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000528 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000529 NEWREF(*pv);
530 sv = (stringobject *) *pv;
531 sv->ob_size = newsize;
532 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533 return 0;
534}
Guido van Rossume5372401993-03-16 12:15:04 +0000535
536/* Helpers for formatstring */
537
538static object *
539getnextarg(args, arglen, p_argidx)
540 object *args;
541 int arglen;
542 int *p_argidx;
543{
544 int argidx = *p_argidx;
545 if (argidx < arglen) {
546 (*p_argidx)++;
547 if (arglen < 0)
548 return args;
549 else
550 return gettupleitem(args, argidx);
551 }
552 err_setstr(TypeError, "not enough arguments for format string");
553 return NULL;
554}
555
556#define F_LJUST (1<<0)
557#define F_SIGN (1<<1)
558#define F_BLANK (1<<2)
559#define F_ALT (1<<3)
560#define F_ZERO (1<<4)
561
562extern double fabs PROTO((double));
563
564static char *
565formatfloat(flags, prec, type, v)
566 int flags;
567 int prec;
568 int type;
569 object *v;
570{
571 char fmt[20];
572 static char buf[120];
573 double x;
574 if (!getargs(v, "d;float argument required", &x))
575 return NULL;
576 if (prec < 0)
577 prec = 6;
578 if (prec > 50)
579 prec = 50; /* Arbitrary limitation */
580 if (type == 'f' && fabs(x)/1e25 >= 1e25)
581 type = 'g';
582 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
583 sprintf(buf, fmt, x);
584 return buf;
585}
586
587static char *
588formatint(flags, prec, type, v)
589 int flags;
590 int prec;
591 int type;
592 object *v;
593{
594 char fmt[20];
595 static char buf[50];
596 long x;
597 if (!getargs(v, "l;int argument required", &x))
598 return NULL;
599 if (prec < 0)
600 prec = 1;
601 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
602 sprintf(buf, fmt, x);
603 return buf;
604}
605
606static char *
607formatchar(v)
608 object *v;
609{
610 static char buf[2];
611 if (is_stringobject(v)) {
612 if (!getargs(v, "c;%c requires int or char", &buf[0]))
613 return NULL;
614 }
615 else {
616 if (!getargs(v, "b;%c requires int or char", &buf[0]))
617 return NULL;
618 }
619 buf[1] = '\0';
620 return buf;
621}
622
Guido van Rossum013142a1994-08-30 08:19:36 +0000623
Guido van Rossume5372401993-03-16 12:15:04 +0000624/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
625
626object *
627formatstring(format, args)
628 object *format;
629 object *args;
630{
631 char *fmt, *res;
632 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +0000633 int args_owned = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000634 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000635 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000636 if (format == NULL || !is_stringobject(format) || args == NULL) {
637 err_badcall();
638 return NULL;
639 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000640 fmt = getstringvalue(format);
641 fmtcnt = getstringsize(format);
642 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000643 result = newsizedstringobject((char *)NULL, reslen);
644 if (result == NULL)
645 return NULL;
646 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000647 if (is_tupleobject(args)) {
648 arglen = gettuplesize(args);
649 argidx = 0;
650 }
651 else {
652 arglen = -1;
653 argidx = -2;
654 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000655 if (args->ob_type->tp_as_mapping)
656 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000657 while (--fmtcnt >= 0) {
658 if (*fmt != '%') {
659 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000660 rescnt = fmtcnt + 100;
661 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000662 if (resizestring(&result, reslen) < 0)
663 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000664 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000665 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000666 }
667 *res++ = *fmt++;
668 }
669 else {
670 /* Got a format specifier */
671 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000672 int width = -1;
673 int prec = -1;
674 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000675 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000676 int fill;
Guido van Rossumda9c2711996-12-05 21:58:58 +0000677 object *v = NULL;
Guido van Rossum013142a1994-08-30 08:19:36 +0000678 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000679 char *buf;
680 int sign;
681 int len;
Guido van Rossumda9c2711996-12-05 21:58:58 +0000682 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +0000683 if (*fmt == '(') {
684 char *keystart;
685 int keylen;
686 object *key;
687
688 if (dict == NULL) {
689 err_setstr(TypeError,
690 "format requires a mapping");
691 goto error;
692 }
693 ++fmt;
694 --fmtcnt;
695 keystart = fmt;
696 while (--fmtcnt >= 0 && *fmt != ')')
697 fmt++;
698 keylen = fmt - keystart;
699 ++fmt;
700 if (fmtcnt < 0) {
701 err_setstr(ValueError,
702 "incomplete format key");
703 goto error;
704 }
705 key = newsizedstringobject(keystart, keylen);
706 if (key == NULL)
707 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +0000708 if (args_owned) {
709 DECREF(args);
710 args_owned = 0;
711 }
712 args = PyObject_GetItem(dict, key);
Guido van Rossum013142a1994-08-30 08:19:36 +0000713 DECREF(key);
714 if (args == NULL) {
715 goto error;
716 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000717 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +0000718 arglen = -1;
719 argidx = -2;
720 }
Guido van Rossume5372401993-03-16 12:15:04 +0000721 while (--fmtcnt >= 0) {
722 switch (c = *fmt++) {
723 case '-': flags |= F_LJUST; continue;
724 case '+': flags |= F_SIGN; continue;
725 case ' ': flags |= F_BLANK; continue;
726 case '#': flags |= F_ALT; continue;
727 case '0': flags |= F_ZERO; continue;
728 }
729 break;
730 }
731 if (c == '*') {
732 v = getnextarg(args, arglen, &argidx);
733 if (v == NULL)
734 goto error;
735 if (!is_intobject(v)) {
736 err_setstr(TypeError, "* wants int");
737 goto error;
738 }
739 width = getintvalue(v);
740 if (width < 0)
741 width = 0;
742 if (--fmtcnt >= 0)
743 c = *fmt++;
744 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000745 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000746 width = c - '0';
747 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000748 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000749 if (!isdigit(c))
750 break;
751 if ((width*10) / 10 != width) {
752 err_setstr(ValueError,
753 "width too big");
754 goto error;
755 }
756 width = width*10 + (c - '0');
757 }
758 }
759 if (c == '.') {
760 prec = 0;
761 if (--fmtcnt >= 0)
762 c = *fmt++;
763 if (c == '*') {
764 v = getnextarg(args, arglen, &argidx);
765 if (v == NULL)
766 goto error;
767 if (!is_intobject(v)) {
768 err_setstr(TypeError,
769 "* wants int");
770 goto error;
771 }
772 prec = getintvalue(v);
773 if (prec < 0)
774 prec = 0;
775 if (--fmtcnt >= 0)
776 c = *fmt++;
777 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000778 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000779 prec = c - '0';
780 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000781 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000782 if (!isdigit(c))
783 break;
784 if ((prec*10) / 10 != prec) {
785 err_setstr(ValueError,
786 "prec too big");
787 goto error;
788 }
789 prec = prec*10 + (c - '0');
790 }
791 }
792 } /* prec */
793 if (fmtcnt >= 0) {
794 if (c == 'h' || c == 'l' || c == 'L') {
795 size = c;
796 if (--fmtcnt >= 0)
797 c = *fmt++;
798 }
799 }
800 if (fmtcnt < 0) {
801 err_setstr(ValueError, "incomplete format");
802 goto error;
803 }
804 if (c != '%') {
805 v = getnextarg(args, arglen, &argidx);
806 if (v == NULL)
807 goto error;
808 }
809 sign = 0;
810 fill = ' ';
811 switch (c) {
812 case '%':
813 buf = "%";
814 len = 1;
815 break;
816 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000817 temp = strobject(v);
818 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000819 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000820 buf = getstringvalue(temp);
821 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000822 if (prec >= 0 && len > prec)
823 len = prec;
824 break;
825 case 'i':
826 case 'd':
827 case 'u':
828 case 'o':
829 case 'x':
830 case 'X':
831 if (c == 'i')
832 c = 'd';
833 buf = formatint(flags, prec, c, v);
834 if (buf == NULL)
835 goto error;
836 len = strlen(buf);
837 sign = (c == 'd');
838 if (flags&F_ZERO)
839 fill = '0';
840 break;
841 case 'e':
842 case 'E':
843 case 'f':
844 case 'g':
845 case 'G':
846 buf = formatfloat(flags, prec, c, v);
847 if (buf == NULL)
848 goto error;
849 len = strlen(buf);
850 sign = 1;
851 if (flags&F_ZERO)
852 fill = '0';
853 break;
854 case 'c':
855 buf = formatchar(v);
856 if (buf == NULL)
857 goto error;
Guido van Rossum6938a291993-11-11 14:51:57 +0000858 len = 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000859 break;
860 default:
861 err_setstr(ValueError,
862 "unsupported format character");
863 goto error;
864 }
865 if (sign) {
866 if (*buf == '-' || *buf == '+') {
867 sign = *buf++;
868 len--;
869 }
870 else if (flags & F_SIGN)
871 sign = '+';
872 else if (flags & F_BLANK)
873 sign = ' ';
874 else
875 sign = '\0';
876 }
877 if (width < len)
878 width = len;
879 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000880 reslen -= rescnt;
881 rescnt = width + fmtcnt + 100;
882 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000883 if (resizestring(&result, reslen) < 0)
884 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000885 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000886 }
887 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000888 if (fill != ' ')
889 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000890 rescnt--;
891 if (width > len)
892 width--;
893 }
894 if (width > len && !(flags&F_LJUST)) {
895 do {
896 --rescnt;
897 *res++ = fill;
898 } while (--width > len);
899 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000900 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000901 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000902 memcpy(res, buf, len);
903 res += len;
904 rescnt -= len;
905 while (--width >= len) {
906 --rescnt;
907 *res++ = ' ';
908 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000909 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000910 err_setstr(TypeError,
911 "not all arguments converted");
912 goto error;
913 }
914 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000915 } /* '%' */
916 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +0000917 if (argidx < arglen && !dict) {
Guido van Rossume5372401993-03-16 12:15:04 +0000918 err_setstr(TypeError, "not all arguments converted");
919 goto error;
920 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000921 if (args_owned)
922 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000923 resizestring(&result, reslen - rescnt);
924 return result;
925 error:
926 DECREF(result);
Guido van Rossum993952b1996-05-21 22:44:20 +0000927 if (args_owned)
928 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000929 return NULL;
930}