blob: 283e219d34a7d3995c6400eba15cb850bc388e0f [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum013142a1994-08-30 08:19:36 +000036#include <ctype.h>
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038#ifdef COUNT_ALLOCS
39int null_strings, one_strings;
40#endif
41
Guido van Rossum03093a21994-09-28 15:51:32 +000042#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043#include <limits.h>
44#else
45#ifndef UCHAR_MAX
46#define UCHAR_MAX 255
47#endif
48#endif
49
50static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000051#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054
55/*
56 Newsizedstringobject() and newstringobject() try in certain cases
57 to share string objects. When the size of the string is zero,
58 these routines always return a pointer to the same string object;
59 when the size is one, they return a pointer to an already existing
60 object if the contents of the string is known. For
61 newstringobject() this is always the case, for
62 newsizedstringobject() this is the case when the first argument in
63 not NULL.
64 A common practice to allocate a string and then fill it in or
65 change it must be done carefully. It is only allowed to change the
66 contents of the string if the obect was gotten from
67 newsizedstringobject() with a NULL first argument, because in the
68 future these routines may try to do even more sharing of objects.
69*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070object *
71newsizedstringobject(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000072 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073 int size;
74{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000076#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0 && (op = nullstring) != NULL) {
78#ifdef COUNT_ALLOCS
79 null_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
84 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
85#ifdef COUNT_ALLOCS
86 one_strings++;
87#endif
88 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000089 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000091#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000094 if (op == NULL)
95 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000096 op->ob_type = &Stringtype;
97 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098#ifdef CACHE_HASH
99 op->ob_shash = -1;
100#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000101#ifdef INTERN_STRINGS
102 op->ob_sinterned = NULL;
103#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000104 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000105 if (str != NULL)
106 memcpy(op->ob_sval, str, size);
107 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000108#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 if (size == 0) {
110 nullstring = op;
111 INCREF(op);
112 } else if (size == 1 && str != NULL) {
113 characters[*str & UCHAR_MAX] = op;
114 INCREF(op);
115 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000116#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117 return (object *) op;
118}
119
120object *
121newstringobject(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000122 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123{
124 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0 && (op = nullstring) != NULL) {
128#ifdef COUNT_ALLOCS
129 null_strings++;
130#endif
131 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000132 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135#ifdef COUNT_ALLOCS
136 one_strings++;
137#endif
138 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000139 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000141#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000143 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000144 if (op == NULL)
145 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000146 op->ob_type = &Stringtype;
147 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148#ifdef CACHE_HASH
149 op->ob_shash = -1;
150#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000151#ifdef INTERN_STRINGS
152 op->ob_sinterned = NULL;
153#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000154 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000155 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000156#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000157 if (size == 0) {
158 nullstring = op;
159 INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 INCREF(op);
163 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000164#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000165 return (object *) op;
166}
167
Guido van Rossum234f9421993-06-17 12:35:49 +0000168static void
Guido van Rossume5372401993-03-16 12:15:04 +0000169string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000170 object *op;
171{
172 DEL(op);
173}
174
Guido van Rossumd7047b31995-01-02 19:07:15 +0000175int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176getstringsize(op)
177 register object *op;
178{
179 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000180 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181 return -1;
182 }
183 return ((stringobject *)op) -> ob_size;
184}
185
186/*const*/ char *
187getstringvalue(op)
188 register object *op;
189{
190 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000191 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192 return NULL;
193 }
194 return ((stringobject *)op) -> ob_sval;
195}
196
197/* Methods */
198
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000199static int
Guido van Rossume5372401993-03-16 12:15:04 +0000200string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 stringobject *op;
202 FILE *fp;
203 int flags;
204{
205 int i;
206 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000207 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000208 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000209 if (flags & PRINT_RAW) {
210 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000212 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000213
214 /* figure out which quote to use; single is prefered */
215 quote = '\'';
216 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
217 quote = '"';
218
219 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220 for (i = 0; i < op->ob_size; i++) {
221 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000222 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 fprintf(fp, "\\%c", c);
224 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000227 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000228 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000229 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000230 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231}
232
233static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000234string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000235 register stringobject *op;
236{
237 /* XXX overflow? */
238 int newsize = 2 + 4 * op->ob_size * sizeof(char);
239 object *v = newsizedstringobject((char *)NULL, newsize);
240 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000241 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000242 }
243 else {
244 register int i;
245 register char c;
246 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000247 int quote;
248
249 /* figure out which quote to use; single is prefered */
250 quote = '\'';
251 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
252 quote = '"';
253
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 for (i = 0; i < op->ob_size; i++) {
257 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 *p++ = '\\', *p++ = c;
260 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 while (*p != '\0')
263 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 }
265 else
266 *p++ = c;
267 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000268 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269 *p = '\0';
270 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000271 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000273}
274
275static int
Guido van Rossume5372401993-03-16 12:15:04 +0000276string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277 stringobject *a;
278{
279 return a->ob_size;
280}
281
282static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000283string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 register stringobject *a;
285 register object *bb;
286{
287 register unsigned int size;
288 register stringobject *op;
289 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000290 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291 return NULL;
292 }
293#define b ((stringobject *)bb)
294 /* Optimize cases with empty left or right operand */
295 if (a->ob_size == 0) {
296 INCREF(bb);
297 return bb;
298 }
299 if (b->ob_size == 0) {
300 INCREF(a);
301 return (object *)a;
302 }
303 size = a->ob_size + b->ob_size;
304 op = (stringobject *)
305 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000306 if (op == NULL)
307 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000308 op->ob_type = &Stringtype;
309 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000310#ifdef CACHE_HASH
311 op->ob_shash = -1;
312#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000313#ifdef INTERN_STRINGS
314 op->ob_sinterned = NULL;
315#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000316 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000317 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
318 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
319 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000320 return (object *) op;
321#undef b
322}
323
324static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000325string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 register stringobject *a;
327 register int n;
328{
329 register int i;
330 register unsigned int size;
331 register stringobject *op;
332 if (n < 0)
333 n = 0;
334 size = a->ob_size * n;
335 if (size == a->ob_size) {
336 INCREF(a);
337 return (object *)a;
338 }
339 op = (stringobject *)
340 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000341 if (op == NULL)
342 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000343 op->ob_type = &Stringtype;
344 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000345#ifdef CACHE_HASH
346 op->ob_shash = -1;
347#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000348#ifdef INTERN_STRINGS
349 op->ob_sinterned = NULL;
350#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000351 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000352 for (i = 0; i < size; i += a->ob_size)
353 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
354 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355 return (object *) op;
356}
357
358/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
359
360static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000361string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362 register stringobject *a;
363 register int i, j; /* May be negative! */
364{
365 if (i < 0)
366 i = 0;
367 if (j < 0)
368 j = 0; /* Avoid signed/unsigned bug in next line */
369 if (j > a->ob_size)
370 j = a->ob_size;
371 if (i == 0 && j == a->ob_size) { /* It's the same as a */
372 INCREF(a);
373 return (object *)a;
374 }
375 if (j < i)
376 j = i;
377 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
378}
379
380static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000381string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 stringobject *a;
383 register int i;
384{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000385 int c;
386 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000388 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000389 return NULL;
390 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000391 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000392 v = (object *) characters[c];
393#ifdef COUNT_ALLOCS
394 if (v != NULL)
395 one_strings++;
396#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000397 if (v == NULL) {
398 v = newsizedstringobject((char *)NULL, 1);
399 if (v == NULL)
400 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000401 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000402 ((stringobject *)v)->ob_sval[0] = c;
403 }
404 INCREF(v);
405 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406}
407
408static int
Guido van Rossume5372401993-03-16 12:15:04 +0000409string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 stringobject *a, *b;
411{
Guido van Rossum253919f1991-02-13 23:18:39 +0000412 int len_a = a->ob_size, len_b = b->ob_size;
413 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000414 int cmp;
415 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000416 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000417 if (cmp == 0)
418 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
419 if (cmp != 0)
420 return cmp;
421 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000422 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423}
424
Guido van Rossum9bfef441993-03-29 10:43:31 +0000425static long
426string_hash(a)
427 stringobject *a;
428{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000429 register int len;
430 register unsigned char *p;
431 register long x;
432
433#ifdef CACHE_HASH
434 if (a->ob_shash != -1)
435 return a->ob_shash;
436#endif
437 len = a->ob_size;
438 p = (unsigned char *) a->ob_sval;
439 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000440 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000441 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000442 x ^= a->ob_size;
443 if (x == -1)
444 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000445#ifdef CACHE_HASH
446 a->ob_shash = x;
447#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000448 return x;
449}
450
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000452 (inquiry)string_length, /*sq_length*/
453 (binaryfunc)string_concat, /*sq_concat*/
454 (intargfunc)string_repeat, /*sq_repeat*/
455 (intargfunc)string_item, /*sq_item*/
456 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000457 0, /*sq_ass_item*/
458 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459};
460
461typeobject Stringtype = {
462 OB_HEAD_INIT(&Typetype)
463 0,
464 "string",
465 sizeof(stringobject),
466 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000467 (destructor)string_dealloc, /*tp_dealloc*/
468 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469 0, /*tp_getattr*/
470 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000471 (cmpfunc)string_compare, /*tp_compare*/
472 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 0, /*tp_as_number*/
474 &string_as_sequence, /*tp_as_sequence*/
475 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000476 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +0000477 0, /*tp_call*/
478 0, /*tp_str*/
479 0, /*tp_getattro*/
480 0, /*tp_setattro*/
481 0, /*tp_xxx3*/
482 0, /*tp_xxx4*/
483 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000484};
485
486void
487joinstring(pv, w)
488 register object **pv;
489 register object *w;
490{
491 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000492 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000494 if (w == NULL || !is_stringobject(*pv)) {
495 DECREF(*pv);
496 *pv = NULL;
497 return;
498 }
Guido van Rossume5372401993-03-16 12:15:04 +0000499 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000500 DECREF(*pv);
501 *pv = v;
502}
503
Guido van Rossum013142a1994-08-30 08:19:36 +0000504void
505joinstring_decref(pv, w)
506 register object **pv;
507 register object *w;
508{
509 joinstring(pv, w);
510 XDECREF(w);
511}
512
513
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514/* The following function breaks the notion that strings are immutable:
515 it changes the size of a string. We get away with this only if there
516 is only one module referencing the object. You can also think of it
517 as creating a new string object and destroying the old one, only
518 more efficiently. In any case, don't use this if the string may
519 already be known to some other part of the code... */
520
521int
522resizestring(pv, newsize)
523 object **pv;
524 int newsize;
525{
Guido van Rossum921842f1990-11-18 17:30:23 +0000526 register object *v;
527 register stringobject *sv;
528 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 if (!is_stringobject(v) || v->ob_refcnt != 1) {
530 *pv = 0;
531 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000532 err_badcall();
533 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000535 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +0000536#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +0000537 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +0000538#endif
539 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 *pv = (object *)
541 realloc((char *)v,
542 sizeof(stringobject) + newsize * sizeof(char));
543 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000544 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000545 err_nomem();
546 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000548 NEWREF(*pv);
549 sv = (stringobject *) *pv;
550 sv->ob_size = newsize;
551 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000552 return 0;
553}
Guido van Rossume5372401993-03-16 12:15:04 +0000554
555/* Helpers for formatstring */
556
557static object *
558getnextarg(args, arglen, p_argidx)
559 object *args;
560 int arglen;
561 int *p_argidx;
562{
563 int argidx = *p_argidx;
564 if (argidx < arglen) {
565 (*p_argidx)++;
566 if (arglen < 0)
567 return args;
568 else
569 return gettupleitem(args, argidx);
570 }
571 err_setstr(TypeError, "not enough arguments for format string");
572 return NULL;
573}
574
575#define F_LJUST (1<<0)
576#define F_SIGN (1<<1)
577#define F_BLANK (1<<2)
578#define F_ALT (1<<3)
579#define F_ZERO (1<<4)
580
581extern double fabs PROTO((double));
582
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000583static int
584formatfloat(buf, flags, prec, type, v)
585 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +0000586 int flags;
587 int prec;
588 int type;
589 object *v;
590{
591 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +0000592 double x;
593 if (!getargs(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000594 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000595 if (prec < 0)
596 prec = 6;
597 if (prec > 50)
598 prec = 50; /* Arbitrary limitation */
599 if (type == 'f' && fabs(x)/1e25 >= 1e25)
600 type = 'g';
601 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
602 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000603 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +0000604}
605
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000606static int
607formatint(buf, flags, prec, type, v)
608 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +0000609 int flags;
610 int prec;
611 int type;
612 object *v;
613{
614 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +0000615 long x;
616 if (!getargs(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000617 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000618 if (prec < 0)
619 prec = 1;
620 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
621 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000622 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +0000623}
624
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000625static int
626formatchar(buf, v)
627 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +0000628 object *v;
629{
Guido van Rossume5372401993-03-16 12:15:04 +0000630 if (is_stringobject(v)) {
631 if (!getargs(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000632 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000633 }
634 else {
635 if (!getargs(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000636 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +0000637 }
638 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000639 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000640}
641
Guido van Rossum013142a1994-08-30 08:19:36 +0000642
Guido van Rossume5372401993-03-16 12:15:04 +0000643/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
644
645object *
646formatstring(format, args)
647 object *format;
648 object *args;
649{
650 char *fmt, *res;
651 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +0000652 int args_owned = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000653 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000654 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000655 if (format == NULL || !is_stringobject(format) || args == NULL) {
656 err_badcall();
657 return NULL;
658 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000659 fmt = getstringvalue(format);
660 fmtcnt = getstringsize(format);
661 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000662 result = newsizedstringobject((char *)NULL, reslen);
663 if (result == NULL)
664 return NULL;
665 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000666 if (is_tupleobject(args)) {
667 arglen = gettuplesize(args);
668 argidx = 0;
669 }
670 else {
671 arglen = -1;
672 argidx = -2;
673 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000674 if (args->ob_type->tp_as_mapping)
675 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000676 while (--fmtcnt >= 0) {
677 if (*fmt != '%') {
678 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000679 rescnt = fmtcnt + 100;
680 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000681 if (resizestring(&result, reslen) < 0)
682 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000683 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000684 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000685 }
686 *res++ = *fmt++;
687 }
688 else {
689 /* Got a format specifier */
690 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000691 int width = -1;
692 int prec = -1;
693 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000694 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000695 int fill;
Guido van Rossumda9c2711996-12-05 21:58:58 +0000696 object *v = NULL;
Guido van Rossum013142a1994-08-30 08:19:36 +0000697 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000698 char *buf;
699 int sign;
700 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000701 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossumda9c2711996-12-05 21:58:58 +0000702 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +0000703 if (*fmt == '(') {
704 char *keystart;
705 int keylen;
706 object *key;
707
708 if (dict == NULL) {
709 err_setstr(TypeError,
710 "format requires a mapping");
711 goto error;
712 }
713 ++fmt;
714 --fmtcnt;
715 keystart = fmt;
716 while (--fmtcnt >= 0 && *fmt != ')')
717 fmt++;
718 keylen = fmt - keystart;
719 ++fmt;
720 if (fmtcnt < 0) {
721 err_setstr(ValueError,
722 "incomplete format key");
723 goto error;
724 }
725 key = newsizedstringobject(keystart, keylen);
726 if (key == NULL)
727 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +0000728 if (args_owned) {
729 DECREF(args);
730 args_owned = 0;
731 }
732 args = PyObject_GetItem(dict, key);
Guido van Rossum013142a1994-08-30 08:19:36 +0000733 DECREF(key);
734 if (args == NULL) {
735 goto error;
736 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000737 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +0000738 arglen = -1;
739 argidx = -2;
740 }
Guido van Rossume5372401993-03-16 12:15:04 +0000741 while (--fmtcnt >= 0) {
742 switch (c = *fmt++) {
743 case '-': flags |= F_LJUST; continue;
744 case '+': flags |= F_SIGN; continue;
745 case ' ': flags |= F_BLANK; continue;
746 case '#': flags |= F_ALT; continue;
747 case '0': flags |= F_ZERO; continue;
748 }
749 break;
750 }
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!is_intobject(v)) {
756 err_setstr(TypeError, "* wants int");
757 goto error;
758 }
759 width = getintvalue(v);
760 if (width < 0)
761 width = 0;
762 if (--fmtcnt >= 0)
763 c = *fmt++;
764 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000765 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000766 width = c - '0';
767 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000768 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000769 if (!isdigit(c))
770 break;
771 if ((width*10) / 10 != width) {
772 err_setstr(ValueError,
773 "width too big");
774 goto error;
775 }
776 width = width*10 + (c - '0');
777 }
778 }
779 if (c == '.') {
780 prec = 0;
781 if (--fmtcnt >= 0)
782 c = *fmt++;
783 if (c == '*') {
784 v = getnextarg(args, arglen, &argidx);
785 if (v == NULL)
786 goto error;
787 if (!is_intobject(v)) {
788 err_setstr(TypeError,
789 "* wants int");
790 goto error;
791 }
792 prec = getintvalue(v);
793 if (prec < 0)
794 prec = 0;
795 if (--fmtcnt >= 0)
796 c = *fmt++;
797 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000798 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000799 prec = c - '0';
800 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000801 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000802 if (!isdigit(c))
803 break;
804 if ((prec*10) / 10 != prec) {
805 err_setstr(ValueError,
806 "prec too big");
807 goto error;
808 }
809 prec = prec*10 + (c - '0');
810 }
811 }
812 } /* prec */
813 if (fmtcnt >= 0) {
814 if (c == 'h' || c == 'l' || c == 'L') {
815 size = c;
816 if (--fmtcnt >= 0)
817 c = *fmt++;
818 }
819 }
820 if (fmtcnt < 0) {
821 err_setstr(ValueError, "incomplete format");
822 goto error;
823 }
824 if (c != '%') {
825 v = getnextarg(args, arglen, &argidx);
826 if (v == NULL)
827 goto error;
828 }
829 sign = 0;
830 fill = ' ';
831 switch (c) {
832 case '%':
833 buf = "%";
834 len = 1;
835 break;
836 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000837 temp = strobject(v);
838 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000839 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000840 buf = getstringvalue(temp);
841 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000842 if (prec >= 0 && len > prec)
843 len = prec;
844 break;
845 case 'i':
846 case 'd':
847 case 'u':
848 case 'o':
849 case 'x':
850 case 'X':
851 if (c == 'i')
852 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000853 buf = tmpbuf;
854 len = formatint(buf, flags, prec, c, v);
855 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +0000856 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +0000857 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +0000858 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +0000859 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +0000860 if ((flags&F_ALT) &&
861 (c == 'x' || c == 'X') &&
862 buf[0] == '0' && buf[1] == c) {
863 *res++ = *buf++;
864 *res++ = *buf++;
865 rescnt -= 2;
866 len -= 2;
867 width -= 2;
868 if (width < 0)
869 width = 0;
870 }
871 }
Guido van Rossume5372401993-03-16 12:15:04 +0000872 break;
873 case 'e':
874 case 'E':
875 case 'f':
876 case 'g':
877 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000878 buf = tmpbuf;
879 len = formatfloat(buf, flags, prec, c, v);
880 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +0000881 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +0000882 sign = 1;
883 if (flags&F_ZERO)
884 fill = '0';
885 break;
886 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +0000887 buf = tmpbuf;
888 len = formatchar(buf, v);
889 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +0000890 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +0000891 break;
892 default:
893 err_setstr(ValueError,
894 "unsupported format character");
895 goto error;
896 }
897 if (sign) {
898 if (*buf == '-' || *buf == '+') {
899 sign = *buf++;
900 len--;
901 }
902 else if (flags & F_SIGN)
903 sign = '+';
904 else if (flags & F_BLANK)
905 sign = ' ';
906 else
907 sign = '\0';
908 }
909 if (width < len)
910 width = len;
911 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000912 reslen -= rescnt;
913 rescnt = width + fmtcnt + 100;
914 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000915 if (resizestring(&result, reslen) < 0)
916 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000917 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000918 }
919 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000920 if (fill != ' ')
921 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000922 rescnt--;
923 if (width > len)
924 width--;
925 }
926 if (width > len && !(flags&F_LJUST)) {
927 do {
928 --rescnt;
929 *res++ = fill;
930 } while (--width > len);
931 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000932 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000933 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000934 memcpy(res, buf, len);
935 res += len;
936 rescnt -= len;
937 while (--width >= len) {
938 --rescnt;
939 *res++ = ' ';
940 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000941 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000942 err_setstr(TypeError,
943 "not all arguments converted");
944 goto error;
945 }
946 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000947 } /* '%' */
948 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +0000949 if (argidx < arglen && !dict) {
Guido van Rossume5372401993-03-16 12:15:04 +0000950 err_setstr(TypeError, "not all arguments converted");
951 goto error;
952 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000953 if (args_owned)
954 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000955 resizestring(&result, reslen - rescnt);
956 return result;
957 error:
958 DECREF(result);
Guido van Rossum993952b1996-05-21 22:44:20 +0000959 if (args_owned)
960 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000961 return NULL;
962}
Guido van Rossum2a61e741997-01-18 07:55:05 +0000963
964
965#ifdef INTERN_STRINGS
966
967static PyObject *interned;
968
969void
970PyString_InternInPlace(p)
971 PyObject **p;
972{
973 register PyStringObject *s = (PyStringObject *)(*p);
974 PyObject *t;
975 if (s == NULL || !PyString_Check(s))
976 Py_FatalError("PyString_InternInPlace: strings only please!");
977 if ((t = s->ob_sinterned) != NULL) {
978 if (t == (PyObject *)s)
979 return;
980 Py_INCREF(t);
981 *p = t;
982 Py_DECREF(s);
983 return;
984 }
985 if (interned == NULL) {
986 interned = PyDict_New();
987 if (interned == NULL)
988 return;
989 /* Force slow lookups: */
990 PyDict_SetItem(interned, Py_None, Py_None);
991 }
992 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
993 Py_INCREF(t);
994 *p = s->ob_sinterned = t;
995 Py_DECREF(s);
996 return;
997 }
998 t = (PyObject *)s;
999 if (PyDict_SetItem(interned, t, t) == 0) {
1000 s->ob_sinterned = t;
1001 return;
1002 }
1003 PyErr_Clear();
1004}
1005
1006
1007PyObject *
1008PyString_InternFromString(cp)
1009 const char *cp;
1010{
1011 PyObject *s = PyString_FromString(cp);
1012 if (s == NULL)
1013 return NULL;
1014 PyString_InternInPlace(&s);
1015 return s;
1016}
1017
1018#endif