blob: f037f961771bd24460b38ad651b64f5c642c814d [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum013142a1994-08-30 08:19:36 +000036#include <ctype.h>
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038#ifdef COUNT_ALLOCS
39int null_strings, one_strings;
40#endif
41
Guido van Rossum03093a21994-09-28 15:51:32 +000042#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043#include <limits.h>
44#else
45#ifndef UCHAR_MAX
46#define UCHAR_MAX 255
47#endif
48#endif
49
50static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000051#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054
55/*
56 Newsizedstringobject() and newstringobject() try in certain cases
57 to share string objects. When the size of the string is zero,
58 these routines always return a pointer to the same string object;
59 when the size is one, they return a pointer to an already existing
60 object if the contents of the string is known. For
61 newstringobject() this is always the case, for
62 newsizedstringobject() this is the case when the first argument in
63 not NULL.
64 A common practice to allocate a string and then fill it in or
65 change it must be done carefully. It is only allowed to change the
66 contents of the string if the obect was gotten from
67 newsizedstringobject() with a NULL first argument, because in the
68 future these routines may try to do even more sharing of objects.
69*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070object *
71newsizedstringobject(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000072 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073 int size;
74{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000076#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0 && (op = nullstring) != NULL) {
78#ifdef COUNT_ALLOCS
79 null_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
84 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
85#ifdef COUNT_ALLOCS
86 one_strings++;
87#endif
88 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000089 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000091#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000094 if (op == NULL)
95 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000096 op->ob_type = &Stringtype;
97 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098#ifdef CACHE_HASH
99 op->ob_shash = -1;
100#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000101#ifdef INTERN_STRINGS
102 op->ob_sinterned = NULL;
103#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000104 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000105 if (str != NULL)
106 memcpy(op->ob_sval, str, size);
107 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000108#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 if (size == 0) {
110 nullstring = op;
111 INCREF(op);
112 } else if (size == 1 && str != NULL) {
113 characters[*str & UCHAR_MAX] = op;
114 INCREF(op);
115 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000116#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117 return (object *) op;
118}
119
120object *
121newstringobject(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000122 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123{
124 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0 && (op = nullstring) != NULL) {
128#ifdef COUNT_ALLOCS
129 null_strings++;
130#endif
131 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000132 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135#ifdef COUNT_ALLOCS
136 one_strings++;
137#endif
138 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000139 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000141#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000143 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000144 if (op == NULL)
145 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000146 op->ob_type = &Stringtype;
147 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148#ifdef CACHE_HASH
149 op->ob_shash = -1;
150#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000151#ifdef INTERN_STRINGS
152 op->ob_sinterned = NULL;
153#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000154 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000155 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000156#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000157 if (size == 0) {
158 nullstring = op;
159 INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 INCREF(op);
163 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000164#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000165 return (object *) op;
166}
167
Guido van Rossum234f9421993-06-17 12:35:49 +0000168static void
Guido van Rossume5372401993-03-16 12:15:04 +0000169string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000170 object *op;
171{
172 DEL(op);
173}
174
Guido van Rossumd7047b31995-01-02 19:07:15 +0000175int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176getstringsize(op)
177 register object *op;
178{
179 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000180 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181 return -1;
182 }
183 return ((stringobject *)op) -> ob_size;
184}
185
186/*const*/ char *
187getstringvalue(op)
188 register object *op;
189{
190 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000191 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192 return NULL;
193 }
194 return ((stringobject *)op) -> ob_sval;
195}
196
197/* Methods */
198
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000199static int
Guido van Rossume5372401993-03-16 12:15:04 +0000200string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 stringobject *op;
202 FILE *fp;
203 int flags;
204{
205 int i;
206 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000207 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000208 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000209 if (flags & PRINT_RAW) {
210 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000212 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000213
214 /* figure out which quote to use; single is prefered */
215 quote = '\'';
216 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
217 quote = '"';
218
219 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220 for (i = 0; i < op->ob_size; i++) {
221 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000222 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 fprintf(fp, "\\%c", c);
224 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000227 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000228 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000229 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000230 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231}
232
233static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000234string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000235 register stringobject *op;
236{
237 /* XXX overflow? */
238 int newsize = 2 + 4 * op->ob_size * sizeof(char);
239 object *v = newsizedstringobject((char *)NULL, newsize);
240 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000241 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000242 }
243 else {
244 register int i;
245 register char c;
246 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000247 int quote;
248
249 /* figure out which quote to use; single is prefered */
250 quote = '\'';
251 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
252 quote = '"';
253
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 for (i = 0; i < op->ob_size; i++) {
257 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 *p++ = '\\', *p++ = c;
260 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 while (*p != '\0')
263 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 }
265 else
266 *p++ = c;
267 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000268 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269 *p = '\0';
270 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000271 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000273}
274
275static int
Guido van Rossume5372401993-03-16 12:15:04 +0000276string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277 stringobject *a;
278{
279 return a->ob_size;
280}
281
282static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000283string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 register stringobject *a;
285 register object *bb;
286{
287 register unsigned int size;
288 register stringobject *op;
289 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000290 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291 return NULL;
292 }
293#define b ((stringobject *)bb)
294 /* Optimize cases with empty left or right operand */
295 if (a->ob_size == 0) {
296 INCREF(bb);
297 return bb;
298 }
299 if (b->ob_size == 0) {
300 INCREF(a);
301 return (object *)a;
302 }
303 size = a->ob_size + b->ob_size;
304 op = (stringobject *)
305 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000306 if (op == NULL)
307 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000308 op->ob_type = &Stringtype;
309 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000310#ifdef CACHE_HASH
311 op->ob_shash = -1;
312#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000313#ifdef INTERN_STRINGS
314 op->ob_sinterned = NULL;
315#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000316 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000317 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
318 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
319 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000320 return (object *) op;
321#undef b
322}
323
324static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000325string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 register stringobject *a;
327 register int n;
328{
329 register int i;
330 register unsigned int size;
331 register stringobject *op;
332 if (n < 0)
333 n = 0;
334 size = a->ob_size * n;
335 if (size == a->ob_size) {
336 INCREF(a);
337 return (object *)a;
338 }
339 op = (stringobject *)
340 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000341 if (op == NULL)
342 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000343 op->ob_type = &Stringtype;
344 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000345#ifdef CACHE_HASH
346 op->ob_shash = -1;
347#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000348#ifdef INTERN_STRINGS
349 op->ob_sinterned = NULL;
350#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000351 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000352 for (i = 0; i < size; i += a->ob_size)
353 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
354 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355 return (object *) op;
356}
357
358/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
359
360static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000361string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362 register stringobject *a;
363 register int i, j; /* May be negative! */
364{
365 if (i < 0)
366 i = 0;
367 if (j < 0)
368 j = 0; /* Avoid signed/unsigned bug in next line */
369 if (j > a->ob_size)
370 j = a->ob_size;
371 if (i == 0 && j == a->ob_size) { /* It's the same as a */
372 INCREF(a);
373 return (object *)a;
374 }
375 if (j < i)
376 j = i;
377 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
378}
379
380static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000381string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 stringobject *a;
383 register int i;
384{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000385 int c;
386 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000388 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000389 return NULL;
390 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000391 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000392 v = (object *) characters[c];
393#ifdef COUNT_ALLOCS
394 if (v != NULL)
395 one_strings++;
396#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000397 if (v == NULL) {
398 v = newsizedstringobject((char *)NULL, 1);
399 if (v == NULL)
400 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000401 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000402 ((stringobject *)v)->ob_sval[0] = c;
403 }
404 INCREF(v);
405 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406}
407
408static int
Guido van Rossume5372401993-03-16 12:15:04 +0000409string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 stringobject *a, *b;
411{
Guido van Rossum253919f1991-02-13 23:18:39 +0000412 int len_a = a->ob_size, len_b = b->ob_size;
413 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000414 int cmp;
415 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000416 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000417 if (cmp == 0)
418 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
419 if (cmp != 0)
420 return cmp;
421 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000422 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423}
424
Guido van Rossum9bfef441993-03-29 10:43:31 +0000425static long
426string_hash(a)
427 stringobject *a;
428{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000429 register int len;
430 register unsigned char *p;
431 register long x;
432
433#ifdef CACHE_HASH
434 if (a->ob_shash != -1)
435 return a->ob_shash;
436#endif
437 len = a->ob_size;
438 p = (unsigned char *) a->ob_sval;
439 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000440 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000441 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000442 x ^= a->ob_size;
443 if (x == -1)
444 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000445#ifdef CACHE_HASH
446 a->ob_shash = x;
447#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000448 return x;
449}
450
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000452 (inquiry)string_length, /*sq_length*/
453 (binaryfunc)string_concat, /*sq_concat*/
454 (intargfunc)string_repeat, /*sq_repeat*/
455 (intargfunc)string_item, /*sq_item*/
456 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000457 0, /*sq_ass_item*/
458 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459};
460
461typeobject Stringtype = {
462 OB_HEAD_INIT(&Typetype)
463 0,
464 "string",
465 sizeof(stringobject),
466 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000467 (destructor)string_dealloc, /*tp_dealloc*/
468 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469 0, /*tp_getattr*/
470 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000471 (cmpfunc)string_compare, /*tp_compare*/
472 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 0, /*tp_as_number*/
474 &string_as_sequence, /*tp_as_sequence*/
475 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000476 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +0000477 0, /*tp_call*/
478 0, /*tp_str*/
479 0, /*tp_getattro*/
480 0, /*tp_setattro*/
481 0, /*tp_xxx3*/
482 0, /*tp_xxx4*/
483 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000484};
485
486void
487joinstring(pv, w)
488 register object **pv;
489 register object *w;
490{
491 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000492 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000494 if (w == NULL || !is_stringobject(*pv)) {
495 DECREF(*pv);
496 *pv = NULL;
497 return;
498 }
Guido van Rossume5372401993-03-16 12:15:04 +0000499 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000500 DECREF(*pv);
501 *pv = v;
502}
503
Guido van Rossum013142a1994-08-30 08:19:36 +0000504void
505joinstring_decref(pv, w)
506 register object **pv;
507 register object *w;
508{
509 joinstring(pv, w);
510 XDECREF(w);
511}
512
513
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514/* The following function breaks the notion that strings are immutable:
515 it changes the size of a string. We get away with this only if there
516 is only one module referencing the object. You can also think of it
517 as creating a new string object and destroying the old one, only
518 more efficiently. In any case, don't use this if the string may
519 already be known to some other part of the code... */
520
521int
522resizestring(pv, newsize)
523 object **pv;
524 int newsize;
525{
Guido van Rossum921842f1990-11-18 17:30:23 +0000526 register object *v;
527 register stringobject *sv;
528 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 if (!is_stringobject(v) || v->ob_refcnt != 1) {
530 *pv = 0;
531 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000532 err_badcall();
533 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000535 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +0000536#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +0000537 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +0000538#endif
539 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 *pv = (object *)
541 realloc((char *)v,
542 sizeof(stringobject) + newsize * sizeof(char));
543 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000544 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000545 err_nomem();
546 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000548 NEWREF(*pv);
549 sv = (stringobject *) *pv;
550 sv->ob_size = newsize;
551 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000552 return 0;
553}
Guido van Rossume5372401993-03-16 12:15:04 +0000554
555/* Helpers for formatstring */
556
557static object *
558getnextarg(args, arglen, p_argidx)
559 object *args;
560 int arglen;
561 int *p_argidx;
562{
563 int argidx = *p_argidx;
564 if (argidx < arglen) {
565 (*p_argidx)++;
566 if (arglen < 0)
567 return args;
568 else
569 return gettupleitem(args, argidx);
570 }
571 err_setstr(TypeError, "not enough arguments for format string");
572 return NULL;
573}
574
575#define F_LJUST (1<<0)
576#define F_SIGN (1<<1)
577#define F_BLANK (1<<2)
578#define F_ALT (1<<3)
579#define F_ZERO (1<<4)
580
581extern double fabs PROTO((double));
582
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000583static int
584formatfloat(buf, flags, prec, type, v)
585 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +0000586 int flags;
587 int prec;
588 int type;
589 object *v;
590{
591 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +0000592 double x;
593 if (!getargs(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000594 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000595 if (prec < 0)
596 prec = 6;
597 if (prec > 50)
598 prec = 50; /* Arbitrary limitation */
599 if (type == 'f' && fabs(x)/1e25 >= 1e25)
600 type = 'g';
601 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
602 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000603 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +0000604}
605
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000606static int
607formatint(buf, flags, prec, type, v)
608 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +0000609 int flags;
610 int prec;
611 int type;
612 object *v;
613{
614 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +0000615 long x;
616 if (!getargs(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000617 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000618 if (prec < 0)
619 prec = 1;
620 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
621 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000622 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +0000623}
624
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000625static int
626formatchar(buf, v)
627 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +0000628 object *v;
629{
Guido van Rossume5372401993-03-16 12:15:04 +0000630 if (is_stringobject(v)) {
631 if (!getargs(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000632 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000633 }
634 else {
635 if (!getargs(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000636 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000637 }
638 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000639 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000640}
641
Guido van Rossum013142a1994-08-30 08:19:36 +0000642
Guido van Rossume5372401993-03-16 12:15:04 +0000643/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
644
645object *
646formatstring(format, args)
647 object *format;
648 object *args;
649{
650 char *fmt, *res;
651 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +0000652 int args_owned = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000653 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000654 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000655 if (format == NULL || !is_stringobject(format) || args == NULL) {
656 err_badcall();
657 return NULL;
658 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000659 fmt = getstringvalue(format);
660 fmtcnt = getstringsize(format);
661 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000662 result = newsizedstringobject((char *)NULL, reslen);
663 if (result == NULL)
664 return NULL;
665 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000666 if (is_tupleobject(args)) {
667 arglen = gettuplesize(args);
668 argidx = 0;
669 }
670 else {
671 arglen = -1;
672 argidx = -2;
673 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000674 if (args->ob_type->tp_as_mapping)
675 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000676 while (--fmtcnt >= 0) {
677 if (*fmt != '%') {
678 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000679 rescnt = fmtcnt + 100;
680 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000681 if (resizestring(&result, reslen) < 0)
682 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000683 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000684 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000685 }
686 *res++ = *fmt++;
687 }
688 else {
689 /* Got a format specifier */
690 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000691 int width = -1;
692 int prec = -1;
693 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000694 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000695 int fill;
Guido van Rossumda9c2711996-12-05 21:58:58 +0000696 object *v = NULL;
Guido van Rossum013142a1994-08-30 08:19:36 +0000697 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000698 char *buf;
699 int sign;
700 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000701 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossumda9c2711996-12-05 21:58:58 +0000702 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +0000703 if (*fmt == '(') {
704 char *keystart;
705 int keylen;
706 object *key;
707
708 if (dict == NULL) {
709 err_setstr(TypeError,
710 "format requires a mapping");
711 goto error;
712 }
713 ++fmt;
714 --fmtcnt;
715 keystart = fmt;
716 while (--fmtcnt >= 0 && *fmt != ')')
717 fmt++;
718 keylen = fmt - keystart;
719 ++fmt;
720 if (fmtcnt < 0) {
721 err_setstr(ValueError,
722 "incomplete format key");
723 goto error;
724 }
725 key = newsizedstringobject(keystart, keylen);
726 if (key == NULL)
727 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +0000728 if (args_owned) {
729 DECREF(args);
730 args_owned = 0;
731 }
732 args = PyObject_GetItem(dict, key);
Guido van Rossum013142a1994-08-30 08:19:36 +0000733 DECREF(key);
734 if (args == NULL) {
735 goto error;
736 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000737 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +0000738 arglen = -1;
739 argidx = -2;
740 }
Guido van Rossume5372401993-03-16 12:15:04 +0000741 while (--fmtcnt >= 0) {
742 switch (c = *fmt++) {
743 case '-': flags |= F_LJUST; continue;
744 case '+': flags |= F_SIGN; continue;
745 case ' ': flags |= F_BLANK; continue;
746 case '#': flags |= F_ALT; continue;
747 case '0': flags |= F_ZERO; continue;
748 }
749 break;
750 }
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!is_intobject(v)) {
756 err_setstr(TypeError, "* wants int");
757 goto error;
758 }
759 width = getintvalue(v);
760 if (width < 0)
761 width = 0;
762 if (--fmtcnt >= 0)
763 c = *fmt++;
764 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000765 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000766 width = c - '0';
767 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000768 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000769 if (!isdigit(c))
770 break;
771 if ((width*10) / 10 != width) {
772 err_setstr(ValueError,
773 "width too big");
774 goto error;
775 }
776 width = width*10 + (c - '0');
777 }
778 }
779 if (c == '.') {
780 prec = 0;
781 if (--fmtcnt >= 0)
782 c = *fmt++;
783 if (c == '*') {
784 v = getnextarg(args, arglen, &argidx);
785 if (v == NULL)
786 goto error;
787 if (!is_intobject(v)) {
788 err_setstr(TypeError,
789 "* wants int");
790 goto error;
791 }
792 prec = getintvalue(v);
793 if (prec < 0)
794 prec = 0;
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000798 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000799 prec = c - '0';
800 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000801 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000802 if (!isdigit(c))
803 break;
804 if ((prec*10) / 10 != prec) {
805 err_setstr(ValueError,
806 "prec too big");
807 goto error;
808 }
809 prec = prec*10 + (c - '0');
810 }
811 }
812 } /* prec */
813 if (fmtcnt >= 0) {
814 if (c == 'h' || c == 'l' || c == 'L') {
815 size = c;
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 }
820 if (fmtcnt < 0) {
821 err_setstr(ValueError, "incomplete format");
822 goto error;
823 }
824 if (c != '%') {
825 v = getnextarg(args, arglen, &argidx);
826 if (v == NULL)
827 goto error;
828 }
829 sign = 0;
830 fill = ' ';
831 switch (c) {
832 case '%':
833 buf = "%";
834 len = 1;
835 break;
836 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000837 temp = strobject(v);
838 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000839 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000840 buf = getstringvalue(temp);
841 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000842 if (prec >= 0 && len > prec)
843 len = prec;
844 break;
845 case 'i':
846 case 'd':
847 case 'u':
848 case 'o':
849 case 'x':
850 case 'X':
851 if (c == 'i')
852 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000853 buf = tmpbuf;
854 len = formatint(buf, flags, prec, c, v);
855 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +0000856 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +0000857 sign = (c == 'd');
858 if (flags&F_ZERO)
859 fill = '0';
860 break;
861 case 'e':
862 case 'E':
863 case 'f':
864 case 'g':
865 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000866 buf = tmpbuf;
867 len = formatfloat(buf, flags, prec, c, v);
868 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +0000869 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +0000870 sign = 1;
871 if (flags&F_ZERO)
872 fill = '0';
873 break;
874 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000875 buf = tmpbuf;
876 len = formatchar(buf, v);
877 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +0000878 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +0000879 break;
880 default:
881 err_setstr(ValueError,
882 "unsupported format character");
883 goto error;
884 }
885 if (sign) {
886 if (*buf == '-' || *buf == '+') {
887 sign = *buf++;
888 len--;
889 }
890 else if (flags & F_SIGN)
891 sign = '+';
892 else if (flags & F_BLANK)
893 sign = ' ';
894 else
895 sign = '\0';
896 }
897 if (width < len)
898 width = len;
899 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000900 reslen -= rescnt;
901 rescnt = width + fmtcnt + 100;
902 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000903 if (resizestring(&result, reslen) < 0)
904 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000905 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000906 }
907 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000908 if (fill != ' ')
909 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000910 rescnt--;
911 if (width > len)
912 width--;
913 }
914 if (width > len && !(flags&F_LJUST)) {
915 do {
916 --rescnt;
917 *res++ = fill;
918 } while (--width > len);
919 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000920 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000921 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000922 memcpy(res, buf, len);
923 res += len;
924 rescnt -= len;
925 while (--width >= len) {
926 --rescnt;
927 *res++ = ' ';
928 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000929 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000930 err_setstr(TypeError,
931 "not all arguments converted");
932 goto error;
933 }
934 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000935 } /* '%' */
936 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +0000937 if (argidx < arglen && !dict) {
Guido van Rossume5372401993-03-16 12:15:04 +0000938 err_setstr(TypeError, "not all arguments converted");
939 goto error;
940 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000941 if (args_owned)
942 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000943 resizestring(&result, reslen - rescnt);
944 return result;
945 error:
946 DECREF(result);
Guido van Rossum993952b1996-05-21 22:44:20 +0000947 if (args_owned)
948 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000949 return NULL;
950}
Guido van Rossum2a61e741997-01-18 07:55:05 +0000951
952
953#ifdef INTERN_STRINGS
954
955static PyObject *interned;
956
957void
958PyString_InternInPlace(p)
959 PyObject **p;
960{
961 register PyStringObject *s = (PyStringObject *)(*p);
962 PyObject *t;
963 if (s == NULL || !PyString_Check(s))
964 Py_FatalError("PyString_InternInPlace: strings only please!");
965 if ((t = s->ob_sinterned) != NULL) {
966 if (t == (PyObject *)s)
967 return;
968 Py_INCREF(t);
969 *p = t;
970 Py_DECREF(s);
971 return;
972 }
973 if (interned == NULL) {
974 interned = PyDict_New();
975 if (interned == NULL)
976 return;
977 /* Force slow lookups: */
978 PyDict_SetItem(interned, Py_None, Py_None);
979 }
980 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
981 Py_INCREF(t);
982 *p = s->ob_sinterned = t;
983 Py_DECREF(s);
984 return;
985 }
986 t = (PyObject *)s;
987 if (PyDict_SetItem(interned, t, t) == 0) {
988 s->ob_sinterned = t;
989 return;
990 }
991 PyErr_Clear();
992}
993
994
995PyObject *
996PyString_InternFromString(cp)
997 const char *cp;
998{
999 PyObject *s = PyString_FromString(cp);
1000 if (s == NULL)
1001 return NULL;
1002 PyString_InternInPlace(&s);
1003 return s;
1004}
1005
1006#endif