blob: d656fa1006b0f5a17853079d415f086d49afbe5d [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossum3f5da241990-12-20 15:06:42 +000034#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum013142a1994-08-30 08:19:36 +000036#include <ctype.h>
37
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000038#ifdef COUNT_ALLOCS
39int null_strings, one_strings;
40#endif
41
Guido van Rossum03093a21994-09-28 15:51:32 +000042#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043#include <limits.h>
44#else
45#ifndef UCHAR_MAX
46#define UCHAR_MAX 255
47#endif
48#endif
49
50static stringobject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000051#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000052static stringobject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000053#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054
55/*
56 Newsizedstringobject() and newstringobject() try in certain cases
57 to share string objects. When the size of the string is zero,
58 these routines always return a pointer to the same string object;
59 when the size is one, they return a pointer to an already existing
60 object if the contents of the string is known. For
61 newstringobject() this is always the case, for
62 newsizedstringobject() this is the case when the first argument in
63 not NULL.
64 A common practice to allocate a string and then fill it in or
65 change it must be done carefully. It is only allowed to change the
66 contents of the string if the obect was gotten from
67 newsizedstringobject() with a NULL first argument, because in the
68 future these routines may try to do even more sharing of objects.
69*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070object *
71newsizedstringobject(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000072 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000073 int size;
74{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000076#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0 && (op = nullstring) != NULL) {
78#ifdef COUNT_ALLOCS
79 null_strings++;
80#endif
81 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000082 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 }
84 if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) {
85#ifdef COUNT_ALLOCS
86 one_strings++;
87#endif
88 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +000089 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000091#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000093 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000094 if (op == NULL)
95 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +000096 op->ob_type = &Stringtype;
97 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098#ifdef CACHE_HASH
99 op->ob_shash = -1;
100#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000101#ifdef INTERN_STRINGS
102 op->ob_sinterned = NULL;
103#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000104 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000105 if (str != NULL)
106 memcpy(op->ob_sval, str, size);
107 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000108#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 if (size == 0) {
110 nullstring = op;
111 INCREF(op);
112 } else if (size == 1 && str != NULL) {
113 characters[*str & UCHAR_MAX] = op;
114 INCREF(op);
115 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000116#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000117 return (object *) op;
118}
119
120object *
121newstringobject(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000122 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123{
124 register unsigned int size = strlen(str);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 register stringobject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0 && (op = nullstring) != NULL) {
128#ifdef COUNT_ALLOCS
129 null_strings++;
130#endif
131 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000132 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135#ifdef COUNT_ALLOCS
136 one_strings++;
137#endif
138 INCREF(op);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000139 return (object *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000140 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000141#endif /* DONT_SHARE_SHORT_STRINGS */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 op = (stringobject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000143 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000144 if (op == NULL)
145 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000146 op->ob_type = &Stringtype;
147 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148#ifdef CACHE_HASH
149 op->ob_shash = -1;
150#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000151#ifdef INTERN_STRINGS
152 op->ob_sinterned = NULL;
153#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000154 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000155 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000156#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000157 if (size == 0) {
158 nullstring = op;
159 INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 INCREF(op);
163 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000164#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000165 return (object *) op;
166}
167
Guido van Rossum234f9421993-06-17 12:35:49 +0000168static void
Guido van Rossume5372401993-03-16 12:15:04 +0000169string_dealloc(op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000170 object *op;
171{
172 DEL(op);
173}
174
Guido van Rossumd7047b31995-01-02 19:07:15 +0000175int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176getstringsize(op)
177 register object *op;
178{
179 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000180 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181 return -1;
182 }
183 return ((stringobject *)op) -> ob_size;
184}
185
186/*const*/ char *
187getstringvalue(op)
188 register object *op;
189{
190 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000191 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192 return NULL;
193 }
194 return ((stringobject *)op) -> ob_sval;
195}
196
197/* Methods */
198
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000199static int
Guido van Rossume5372401993-03-16 12:15:04 +0000200string_print(op, fp, flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000201 stringobject *op;
202 FILE *fp;
203 int flags;
204{
205 int i;
206 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000207 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000208 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000209 if (flags & PRINT_RAW) {
210 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000212 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000213
214 /* figure out which quote to use; single is prefered */
215 quote = '\'';
216 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
217 quote = '"';
218
219 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220 for (i = 0; i < op->ob_size; i++) {
221 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000222 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 fprintf(fp, "\\%c", c);
224 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000227 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000228 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000229 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000230 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231}
232
233static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000234string_repr(op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000235 register stringobject *op;
236{
237 /* XXX overflow? */
238 int newsize = 2 + 4 * op->ob_size * sizeof(char);
239 object *v = newsizedstringobject((char *)NULL, newsize);
240 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000241 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000242 }
243 else {
244 register int i;
245 register char c;
246 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000247 int quote;
248
249 /* figure out which quote to use; single is prefered */
250 quote = '\'';
251 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
252 quote = '"';
253
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254 p = ((stringobject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 for (i = 0; i < op->ob_size; i++) {
257 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 *p++ = '\\', *p++ = c;
260 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 while (*p != '\0')
263 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 }
265 else
266 *p++ = c;
267 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000268 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000269 *p = '\0';
270 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000271 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000273}
274
275static int
Guido van Rossume5372401993-03-16 12:15:04 +0000276string_length(a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277 stringobject *a;
278{
279 return a->ob_size;
280}
281
282static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000283string_concat(a, bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 register stringobject *a;
285 register object *bb;
286{
287 register unsigned int size;
288 register stringobject *op;
289 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000290 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291 return NULL;
292 }
293#define b ((stringobject *)bb)
294 /* Optimize cases with empty left or right operand */
295 if (a->ob_size == 0) {
296 INCREF(bb);
297 return bb;
298 }
299 if (b->ob_size == 0) {
300 INCREF(a);
301 return (object *)a;
302 }
303 size = a->ob_size + b->ob_size;
304 op = (stringobject *)
305 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000306 if (op == NULL)
307 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000308 op->ob_type = &Stringtype;
309 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000310#ifdef CACHE_HASH
311 op->ob_shash = -1;
312#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000313#ifdef INTERN_STRINGS
314 op->ob_sinterned = NULL;
315#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000316 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000317 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
318 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
319 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000320 return (object *) op;
321#undef b
322}
323
324static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000325string_repeat(a, n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 register stringobject *a;
327 register int n;
328{
329 register int i;
330 register unsigned int size;
331 register stringobject *op;
332 if (n < 0)
333 n = 0;
334 size = a->ob_size * n;
335 if (size == a->ob_size) {
336 INCREF(a);
337 return (object *)a;
338 }
339 op = (stringobject *)
340 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000341 if (op == NULL)
342 return err_nomem();
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000343 op->ob_type = &Stringtype;
344 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000345#ifdef CACHE_HASH
346 op->ob_shash = -1;
347#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000348#ifdef INTERN_STRINGS
349 op->ob_sinterned = NULL;
350#endif
Sjoerd Mullendera9c3c221993-10-11 12:54:31 +0000351 NEWREF(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000352 for (i = 0; i < size; i += a->ob_size)
353 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
354 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355 return (object *) op;
356}
357
358/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
359
360static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000361string_slice(a, i, j)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362 register stringobject *a;
363 register int i, j; /* May be negative! */
364{
365 if (i < 0)
366 i = 0;
367 if (j < 0)
368 j = 0; /* Avoid signed/unsigned bug in next line */
369 if (j > a->ob_size)
370 j = a->ob_size;
371 if (i == 0 && j == a->ob_size) { /* It's the same as a */
372 INCREF(a);
373 return (object *)a;
374 }
375 if (j < i)
376 j = i;
377 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
378}
379
380static object *
Guido van Rossume5372401993-03-16 12:15:04 +0000381string_item(a, i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 stringobject *a;
383 register int i;
384{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000385 int c;
386 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000388 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000389 return NULL;
390 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000391 c = a->ob_sval[i] & UCHAR_MAX;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000392 v = (object *) characters[c];
393#ifdef COUNT_ALLOCS
394 if (v != NULL)
395 one_strings++;
396#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000397 if (v == NULL) {
398 v = newsizedstringobject((char *)NULL, 1);
399 if (v == NULL)
400 return NULL;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000401 characters[c] = (stringobject *) v;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000402 ((stringobject *)v)->ob_sval[0] = c;
403 }
404 INCREF(v);
405 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406}
407
408static int
Guido van Rossume5372401993-03-16 12:15:04 +0000409string_compare(a, b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 stringobject *a, *b;
411{
Guido van Rossum253919f1991-02-13 23:18:39 +0000412 int len_a = a->ob_size, len_b = b->ob_size;
413 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000414 int cmp;
415 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000416 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000417 if (cmp == 0)
418 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
419 if (cmp != 0)
420 return cmp;
421 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000422 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423}
424
Guido van Rossum9bfef441993-03-29 10:43:31 +0000425static long
426string_hash(a)
427 stringobject *a;
428{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000429 register int len;
430 register unsigned char *p;
431 register long x;
432
433#ifdef CACHE_HASH
434 if (a->ob_shash != -1)
435 return a->ob_shash;
436#endif
437 len = a->ob_size;
438 p = (unsigned char *) a->ob_sval;
439 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000440 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000441 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000442 x ^= a->ob_size;
443 if (x == -1)
444 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000445#ifdef CACHE_HASH
446 a->ob_shash = x;
447#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000448 return x;
449}
450
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451static sequence_methods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000452 (inquiry)string_length, /*sq_length*/
453 (binaryfunc)string_concat, /*sq_concat*/
454 (intargfunc)string_repeat, /*sq_repeat*/
455 (intargfunc)string_item, /*sq_item*/
456 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000457 0, /*sq_ass_item*/
458 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459};
460
461typeobject Stringtype = {
462 OB_HEAD_INIT(&Typetype)
463 0,
464 "string",
465 sizeof(stringobject),
466 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +0000467 (destructor)string_dealloc, /*tp_dealloc*/
468 (printfunc)string_print, /*tp_print*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469 0, /*tp_getattr*/
470 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000471 (cmpfunc)string_compare, /*tp_compare*/
472 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 0, /*tp_as_number*/
474 &string_as_sequence, /*tp_as_sequence*/
475 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +0000476 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +0000477 0, /*tp_call*/
478 0, /*tp_str*/
479 0, /*tp_getattro*/
480 0, /*tp_setattro*/
481 0, /*tp_xxx3*/
482 0, /*tp_xxx4*/
483 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000484};
485
486void
487joinstring(pv, w)
488 register object **pv;
489 register object *w;
490{
491 register object *v;
Guido van Rossum013142a1994-08-30 08:19:36 +0000492 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 return;
Guido van Rossum013142a1994-08-30 08:19:36 +0000494 if (w == NULL || !is_stringobject(*pv)) {
495 DECREF(*pv);
496 *pv = NULL;
497 return;
498 }
Guido van Rossume5372401993-03-16 12:15:04 +0000499 v = string_concat((stringobject *) *pv, w);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000500 DECREF(*pv);
501 *pv = v;
502}
503
Guido van Rossum013142a1994-08-30 08:19:36 +0000504void
505joinstring_decref(pv, w)
506 register object **pv;
507 register object *w;
508{
509 joinstring(pv, w);
510 XDECREF(w);
511}
512
513
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514/* The following function breaks the notion that strings are immutable:
515 it changes the size of a string. We get away with this only if there
516 is only one module referencing the object. You can also think of it
517 as creating a new string object and destroying the old one, only
518 more efficiently. In any case, don't use this if the string may
519 already be known to some other part of the code... */
520
521int
522resizestring(pv, newsize)
523 object **pv;
524 int newsize;
525{
Guido van Rossum921842f1990-11-18 17:30:23 +0000526 register object *v;
527 register stringobject *sv;
528 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 if (!is_stringobject(v) || v->ob_refcnt != 1) {
530 *pv = 0;
531 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000532 err_badcall();
533 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000535 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +0000536#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +0000537 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +0000538#endif
539 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 *pv = (object *)
541 realloc((char *)v,
542 sizeof(stringobject) + newsize * sizeof(char));
543 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000544 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000545 err_nomem();
546 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000547 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000548 NEWREF(*pv);
549 sv = (stringobject *) *pv;
550 sv->ob_size = newsize;
551 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000552 return 0;
553}
Guido van Rossume5372401993-03-16 12:15:04 +0000554
555/* Helpers for formatstring */
556
557static object *
558getnextarg(args, arglen, p_argidx)
559 object *args;
560 int arglen;
561 int *p_argidx;
562{
563 int argidx = *p_argidx;
564 if (argidx < arglen) {
565 (*p_argidx)++;
566 if (arglen < 0)
567 return args;
568 else
569 return gettupleitem(args, argidx);
570 }
571 err_setstr(TypeError, "not enough arguments for format string");
572 return NULL;
573}
574
575#define F_LJUST (1<<0)
576#define F_SIGN (1<<1)
577#define F_BLANK (1<<2)
578#define F_ALT (1<<3)
579#define F_ZERO (1<<4)
580
581extern double fabs PROTO((double));
582
583static char *
584formatfloat(flags, prec, type, v)
585 int flags;
586 int prec;
587 int type;
588 object *v;
589{
590 char fmt[20];
591 static char buf[120];
592 double x;
593 if (!getargs(v, "d;float argument required", &x))
594 return NULL;
595 if (prec < 0)
596 prec = 6;
597 if (prec > 50)
598 prec = 50; /* Arbitrary limitation */
599 if (type == 'f' && fabs(x)/1e25 >= 1e25)
600 type = 'g';
601 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
602 sprintf(buf, fmt, x);
603 return buf;
604}
605
606static char *
607formatint(flags, prec, type, v)
608 int flags;
609 int prec;
610 int type;
611 object *v;
612{
613 char fmt[20];
614 static char buf[50];
615 long x;
616 if (!getargs(v, "l;int argument required", &x))
617 return NULL;
618 if (prec < 0)
619 prec = 1;
620 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
621 sprintf(buf, fmt, x);
622 return buf;
623}
624
625static char *
626formatchar(v)
627 object *v;
628{
629 static char buf[2];
630 if (is_stringobject(v)) {
631 if (!getargs(v, "c;%c requires int or char", &buf[0]))
632 return NULL;
633 }
634 else {
635 if (!getargs(v, "b;%c requires int or char", &buf[0]))
636 return NULL;
637 }
638 buf[1] = '\0';
639 return buf;
640}
641
Guido van Rossum013142a1994-08-30 08:19:36 +0000642
Guido van Rossume5372401993-03-16 12:15:04 +0000643/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
644
645object *
646formatstring(format, args)
647 object *format;
648 object *args;
649{
650 char *fmt, *res;
651 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +0000652 int args_owned = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000653 object *result;
Guido van Rossum013142a1994-08-30 08:19:36 +0000654 object *dict = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000655 if (format == NULL || !is_stringobject(format) || args == NULL) {
656 err_badcall();
657 return NULL;
658 }
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000659 fmt = getstringvalue(format);
660 fmtcnt = getstringsize(format);
661 reslen = rescnt = fmtcnt + 100;
Guido van Rossume5372401993-03-16 12:15:04 +0000662 result = newsizedstringobject((char *)NULL, reslen);
663 if (result == NULL)
664 return NULL;
665 res = getstringvalue(result);
Guido van Rossume5372401993-03-16 12:15:04 +0000666 if (is_tupleobject(args)) {
667 arglen = gettuplesize(args);
668 argidx = 0;
669 }
670 else {
671 arglen = -1;
672 argidx = -2;
673 }
Guido van Rossum013142a1994-08-30 08:19:36 +0000674 if (args->ob_type->tp_as_mapping)
675 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +0000676 while (--fmtcnt >= 0) {
677 if (*fmt != '%') {
678 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000679 rescnt = fmtcnt + 100;
680 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000681 if (resizestring(&result, reslen) < 0)
682 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000683 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +0000684 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000685 }
686 *res++ = *fmt++;
687 }
688 else {
689 /* Got a format specifier */
690 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +0000691 int width = -1;
692 int prec = -1;
693 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +0000694 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +0000695 int fill;
Guido van Rossumda9c2711996-12-05 21:58:58 +0000696 object *v = NULL;
Guido van Rossum013142a1994-08-30 08:19:36 +0000697 object *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +0000698 char *buf;
699 int sign;
700 int len;
Guido van Rossumda9c2711996-12-05 21:58:58 +0000701 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +0000702 if (*fmt == '(') {
703 char *keystart;
704 int keylen;
705 object *key;
706
707 if (dict == NULL) {
708 err_setstr(TypeError,
709 "format requires a mapping");
710 goto error;
711 }
712 ++fmt;
713 --fmtcnt;
714 keystart = fmt;
715 while (--fmtcnt >= 0 && *fmt != ')')
716 fmt++;
717 keylen = fmt - keystart;
718 ++fmt;
719 if (fmtcnt < 0) {
720 err_setstr(ValueError,
721 "incomplete format key");
722 goto error;
723 }
724 key = newsizedstringobject(keystart, keylen);
725 if (key == NULL)
726 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +0000727 if (args_owned) {
728 DECREF(args);
729 args_owned = 0;
730 }
731 args = PyObject_GetItem(dict, key);
Guido van Rossum013142a1994-08-30 08:19:36 +0000732 DECREF(key);
733 if (args == NULL) {
734 goto error;
735 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000736 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +0000737 arglen = -1;
738 argidx = -2;
739 }
Guido van Rossume5372401993-03-16 12:15:04 +0000740 while (--fmtcnt >= 0) {
741 switch (c = *fmt++) {
742 case '-': flags |= F_LJUST; continue;
743 case '+': flags |= F_SIGN; continue;
744 case ' ': flags |= F_BLANK; continue;
745 case '#': flags |= F_ALT; continue;
746 case '0': flags |= F_ZERO; continue;
747 }
748 break;
749 }
750 if (c == '*') {
751 v = getnextarg(args, arglen, &argidx);
752 if (v == NULL)
753 goto error;
754 if (!is_intobject(v)) {
755 err_setstr(TypeError, "* wants int");
756 goto error;
757 }
758 width = getintvalue(v);
759 if (width < 0)
760 width = 0;
761 if (--fmtcnt >= 0)
762 c = *fmt++;
763 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000764 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000765 width = c - '0';
766 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000767 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000768 if (!isdigit(c))
769 break;
770 if ((width*10) / 10 != width) {
771 err_setstr(ValueError,
772 "width too big");
773 goto error;
774 }
775 width = width*10 + (c - '0');
776 }
777 }
778 if (c == '.') {
779 prec = 0;
780 if (--fmtcnt >= 0)
781 c = *fmt++;
782 if (c == '*') {
783 v = getnextarg(args, arglen, &argidx);
784 if (v == NULL)
785 goto error;
786 if (!is_intobject(v)) {
787 err_setstr(TypeError,
788 "* wants int");
789 goto error;
790 }
791 prec = getintvalue(v);
792 if (prec < 0)
793 prec = 0;
794 if (--fmtcnt >= 0)
795 c = *fmt++;
796 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000797 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +0000798 prec = c - '0';
799 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000800 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +0000801 if (!isdigit(c))
802 break;
803 if ((prec*10) / 10 != prec) {
804 err_setstr(ValueError,
805 "prec too big");
806 goto error;
807 }
808 prec = prec*10 + (c - '0');
809 }
810 }
811 } /* prec */
812 if (fmtcnt >= 0) {
813 if (c == 'h' || c == 'l' || c == 'L') {
814 size = c;
815 if (--fmtcnt >= 0)
816 c = *fmt++;
817 }
818 }
819 if (fmtcnt < 0) {
820 err_setstr(ValueError, "incomplete format");
821 goto error;
822 }
823 if (c != '%') {
824 v = getnextarg(args, arglen, &argidx);
825 if (v == NULL)
826 goto error;
827 }
828 sign = 0;
829 fill = ' ';
830 switch (c) {
831 case '%':
832 buf = "%";
833 len = 1;
834 break;
835 case 's':
Guido van Rossum013142a1994-08-30 08:19:36 +0000836 temp = strobject(v);
837 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +0000838 goto error;
Guido van Rossum013142a1994-08-30 08:19:36 +0000839 buf = getstringvalue(temp);
840 len = getstringsize(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000841 if (prec >= 0 && len > prec)
842 len = prec;
843 break;
844 case 'i':
845 case 'd':
846 case 'u':
847 case 'o':
848 case 'x':
849 case 'X':
850 if (c == 'i')
851 c = 'd';
852 buf = formatint(flags, prec, c, v);
853 if (buf == NULL)
854 goto error;
855 len = strlen(buf);
856 sign = (c == 'd');
857 if (flags&F_ZERO)
858 fill = '0';
859 break;
860 case 'e':
861 case 'E':
862 case 'f':
863 case 'g':
864 case 'G':
865 buf = formatfloat(flags, prec, c, v);
866 if (buf == NULL)
867 goto error;
868 len = strlen(buf);
869 sign = 1;
870 if (flags&F_ZERO)
871 fill = '0';
872 break;
873 case 'c':
874 buf = formatchar(v);
875 if (buf == NULL)
876 goto error;
Guido van Rossum6938a291993-11-11 14:51:57 +0000877 len = 1;
Guido van Rossume5372401993-03-16 12:15:04 +0000878 break;
879 default:
880 err_setstr(ValueError,
881 "unsupported format character");
882 goto error;
883 }
884 if (sign) {
885 if (*buf == '-' || *buf == '+') {
886 sign = *buf++;
887 len--;
888 }
889 else if (flags & F_SIGN)
890 sign = '+';
891 else if (flags & F_BLANK)
892 sign = ' ';
893 else
894 sign = '\0';
895 }
896 if (width < len)
897 width = len;
898 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000899 reslen -= rescnt;
900 rescnt = width + fmtcnt + 100;
901 reslen += rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000902 if (resizestring(&result, reslen) < 0)
903 return NULL;
Guido van Rossum6ac258d1993-05-12 08:24:20 +0000904 res = getstringvalue(result) + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +0000905 }
906 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +0000907 if (fill != ' ')
908 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000909 rescnt--;
910 if (width > len)
911 width--;
912 }
913 if (width > len && !(flags&F_LJUST)) {
914 do {
915 --rescnt;
916 *res++ = fill;
917 } while (--width > len);
918 }
Guido van Rossum71e57d01993-11-11 15:03:51 +0000919 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +0000920 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +0000921 memcpy(res, buf, len);
922 res += len;
923 rescnt -= len;
924 while (--width >= len) {
925 --rescnt;
926 *res++ = ' ';
927 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +0000928 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossum013142a1994-08-30 08:19:36 +0000929 err_setstr(TypeError,
930 "not all arguments converted");
931 goto error;
932 }
933 XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +0000934 } /* '%' */
935 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +0000936 if (argidx < arglen && !dict) {
Guido van Rossume5372401993-03-16 12:15:04 +0000937 err_setstr(TypeError, "not all arguments converted");
938 goto error;
939 }
Guido van Rossum993952b1996-05-21 22:44:20 +0000940 if (args_owned)
941 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000942 resizestring(&result, reslen - rescnt);
943 return result;
944 error:
945 DECREF(result);
Guido van Rossum993952b1996-05-21 22:44:20 +0000946 if (args_owned)
947 DECREF(args);
Guido van Rossume5372401993-03-16 12:15:04 +0000948 return NULL;
949}
Guido van Rossum2a61e741997-01-18 07:55:05 +0000950
951
952#ifdef INTERN_STRINGS
953
954static PyObject *interned;
955
956void
957PyString_InternInPlace(p)
958 PyObject **p;
959{
960 register PyStringObject *s = (PyStringObject *)(*p);
961 PyObject *t;
962 if (s == NULL || !PyString_Check(s))
963 Py_FatalError("PyString_InternInPlace: strings only please!");
964 if ((t = s->ob_sinterned) != NULL) {
965 if (t == (PyObject *)s)
966 return;
967 Py_INCREF(t);
968 *p = t;
969 Py_DECREF(s);
970 return;
971 }
972 if (interned == NULL) {
973 interned = PyDict_New();
974 if (interned == NULL)
975 return;
976 /* Force slow lookups: */
977 PyDict_SetItem(interned, Py_None, Py_None);
978 }
979 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
980 Py_INCREF(t);
981 *p = s->ob_sinterned = t;
982 Py_DECREF(s);
983 return;
984 }
985 t = (PyObject *)s;
986 if (PyDict_SetItem(interned, t, t) == 0) {
987 s->ob_sinterned = t;
988 return;
989 }
990 PyErr_Clear();
991}
992
993
994PyObject *
995PyString_InternFromString(cp)
996 const char *cp;
997{
998 PyObject *s = PyString_FromString(cp);
999 if (s == NULL)
1000 return NULL;
1001 PyString_InternInPlace(&s);
1002 return s;
1003}
1004
1005#endif