blob: 16884d369f0e048115d18a31d486205a4aaac504 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
2Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
3Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior permission.
14
15STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
16THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
18FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
20ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
21OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
23******************************************************************/
24
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000025/* String object implementation */
26
Guido van Rossum3f5da241990-12-20 15:06:42 +000027#include "allobjects.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000028
29object *
30newsizedstringobject(str, size)
31 char *str;
32 int size;
33{
34 register stringobject *op = (stringobject *)
35 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000036 if (op == NULL)
37 return err_nomem();
38 NEWREF(op);
39 op->ob_type = &Stringtype;
40 op->ob_size = size;
41 if (str != NULL)
42 memcpy(op->ob_sval, str, size);
43 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000044 return (object *) op;
45}
46
47object *
48newstringobject(str)
49 char *str;
50{
51 register unsigned int size = strlen(str);
52 register stringobject *op = (stringobject *)
53 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000054 if (op == NULL)
55 return err_nomem();
56 NEWREF(op);
57 op->ob_type = &Stringtype;
58 op->ob_size = size;
59 strcpy(op->ob_sval, str);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000060 return (object *) op;
61}
62
63unsigned int
64getstringsize(op)
65 register object *op;
66{
67 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000069 return -1;
70 }
71 return ((stringobject *)op) -> ob_size;
72}
73
74/*const*/ char *
75getstringvalue(op)
76 register object *op;
77{
78 if (!is_stringobject(op)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 err_badcall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000080 return NULL;
81 }
82 return ((stringobject *)op) -> ob_sval;
83}
84
85/* Methods */
86
Guido van Rossumbcaa31c1991-06-07 22:58:57 +000087static int
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088stringprint(op, fp, flags)
89 stringobject *op;
90 FILE *fp;
91 int flags;
92{
93 int i;
94 char c;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +000095 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000096 if (flags & PRINT_RAW) {
97 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +000098 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099 }
100 fprintf(fp, "'");
101 for (i = 0; i < op->ob_size; i++) {
102 c = op->ob_sval[i];
103 if (c == '\'' || c == '\\')
104 fprintf(fp, "\\%c", c);
105 else if (c < ' ' || c >= 0177)
106 fprintf(fp, "\\%03o", c&0377);
107 else
108 putc(c, fp);
109 }
110 fprintf(fp, "'");
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000111 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000112}
113
114static object *
115stringrepr(op)
116 register stringobject *op;
117{
118 /* XXX overflow? */
119 int newsize = 2 + 4 * op->ob_size * sizeof(char);
120 object *v = newsizedstringobject((char *)NULL, newsize);
121 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000122 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000123 }
124 else {
125 register int i;
126 register char c;
127 register char *p;
128 NEWREF(v);
129 v->ob_type = &Stringtype;
130 ((stringobject *)v)->ob_size = newsize;
131 p = ((stringobject *)v)->ob_sval;
132 *p++ = '\'';
133 for (i = 0; i < op->ob_size; i++) {
134 c = op->ob_sval[i];
135 if (c == '\'' || c == '\\')
136 *p++ = '\\', *p++ = c;
137 else if (c < ' ' || c >= 0177) {
138 sprintf(p, "\\%03o", c&0377);
139 while (*p != '\0')
140 p++;
141
142 }
143 else
144 *p++ = c;
145 }
146 *p++ = '\'';
147 *p = '\0';
148 resizestring(&v, (int) (p - ((stringobject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
153static int
154stringlength(a)
155 stringobject *a;
156{
157 return a->ob_size;
158}
159
160static object *
161stringconcat(a, bb)
162 register stringobject *a;
163 register object *bb;
164{
165 register unsigned int size;
166 register stringobject *op;
167 if (!is_stringobject(bb)) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000168 err_badarg();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169 return NULL;
170 }
171#define b ((stringobject *)bb)
172 /* Optimize cases with empty left or right operand */
173 if (a->ob_size == 0) {
174 INCREF(bb);
175 return bb;
176 }
177 if (b->ob_size == 0) {
178 INCREF(a);
179 return (object *)a;
180 }
181 size = a->ob_size + b->ob_size;
182 op = (stringobject *)
183 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000184 if (op == NULL)
185 return err_nomem();
186 NEWREF(op);
187 op->ob_type = &Stringtype;
188 op->ob_size = size;
189 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
190 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
191 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192 return (object *) op;
193#undef b
194}
195
196static object *
197stringrepeat(a, n)
198 register stringobject *a;
199 register int n;
200{
201 register int i;
202 register unsigned int size;
203 register stringobject *op;
204 if (n < 0)
205 n = 0;
206 size = a->ob_size * n;
207 if (size == a->ob_size) {
208 INCREF(a);
209 return (object *)a;
210 }
211 op = (stringobject *)
212 malloc(sizeof(stringobject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000213 if (op == NULL)
214 return err_nomem();
215 NEWREF(op);
216 op->ob_type = &Stringtype;
217 op->ob_size = size;
218 for (i = 0; i < size; i += a->ob_size)
219 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
220 op->ob_sval[size] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000221 return (object *) op;
222}
223
224/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
225
226static object *
227stringslice(a, i, j)
228 register stringobject *a;
229 register int i, j; /* May be negative! */
230{
231 if (i < 0)
232 i = 0;
233 if (j < 0)
234 j = 0; /* Avoid signed/unsigned bug in next line */
235 if (j > a->ob_size)
236 j = a->ob_size;
237 if (i == 0 && j == a->ob_size) { /* It's the same as a */
238 INCREF(a);
239 return (object *)a;
240 }
241 if (j < i)
242 j = i;
243 return newsizedstringobject(a->ob_sval + i, (int) (j-i));
244}
245
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000246#ifdef __STDC__
247#include <limits.h>
248#else
249#ifndef UCHAR_MAX
250#define UCHAR_MAX 255
251#endif
252#endif
253
254static object *characters[UCHAR_MAX + 1];
255
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256static object *
257stringitem(a, i)
258 stringobject *a;
259 register int i;
260{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000261 int c;
262 object *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000263 if (i < 0 || i >= a->ob_size) {
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000264 err_setstr(IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 return NULL;
266 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000267 c = a->ob_sval[i] & UCHAR_MAX;
268 v = characters[c];
269 if (v == NULL) {
270 v = newsizedstringobject((char *)NULL, 1);
271 if (v == NULL)
272 return NULL;
273 characters[c] = v;
274 ((stringobject *)v)->ob_sval[0] = c;
275 }
276 INCREF(v);
277 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278}
279
280static int
281stringcompare(a, b)
282 stringobject *a, *b;
283{
Guido van Rossum253919f1991-02-13 23:18:39 +0000284 int len_a = a->ob_size, len_b = b->ob_size;
285 int min_len = (len_a < len_b) ? len_a : len_b;
286 int cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
287 if (cmp != 0)
288 return cmp;
289 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290}
291
292static sequence_methods string_as_sequence = {
Guido van Rossumf380e661991-06-04 19:36:32 +0000293 stringlength, /*sq_length*/
294 stringconcat, /*sq_concat*/
295 stringrepeat, /*sq_repeat*/
296 stringitem, /*sq_item*/
297 stringslice, /*sq_slice*/
298 0, /*sq_ass_item*/
299 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300};
301
302typeobject Stringtype = {
303 OB_HEAD_INIT(&Typetype)
304 0,
305 "string",
306 sizeof(stringobject),
307 sizeof(char),
308 free, /*tp_dealloc*/
309 stringprint, /*tp_print*/
310 0, /*tp_getattr*/
311 0, /*tp_setattr*/
312 stringcompare, /*tp_compare*/
313 stringrepr, /*tp_repr*/
314 0, /*tp_as_number*/
315 &string_as_sequence, /*tp_as_sequence*/
316 0, /*tp_as_mapping*/
317};
318
319void
320joinstring(pv, w)
321 register object **pv;
322 register object *w;
323{
324 register object *v;
325 if (*pv == NULL || w == NULL || !is_stringobject(*pv))
326 return;
327 v = stringconcat((stringobject *) *pv, w);
328 DECREF(*pv);
329 *pv = v;
330}
331
332/* The following function breaks the notion that strings are immutable:
333 it changes the size of a string. We get away with this only if there
334 is only one module referencing the object. You can also think of it
335 as creating a new string object and destroying the old one, only
336 more efficiently. In any case, don't use this if the string may
337 already be known to some other part of the code... */
338
339int
340resizestring(pv, newsize)
341 object **pv;
342 int newsize;
343{
Guido van Rossum921842f1990-11-18 17:30:23 +0000344 register object *v;
345 register stringobject *sv;
346 v = *pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000347 if (!is_stringobject(v) || v->ob_refcnt != 1) {
348 *pv = 0;
349 DECREF(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000350 err_badcall();
351 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000353 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum392ab321990-11-18 17:41:19 +0000354#ifdef REF_DEBUG
Guido van Rossum921842f1990-11-18 17:30:23 +0000355 --ref_total;
356#endif
357 UNREF(v);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 *pv = (object *)
359 realloc((char *)v,
360 sizeof(stringobject) + newsize * sizeof(char));
361 if (*pv == NULL) {
Guido van Rossum921842f1990-11-18 17:30:23 +0000362 DEL(v);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000363 err_nomem();
364 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000365 }
Guido van Rossum921842f1990-11-18 17:30:23 +0000366 NEWREF(*pv);
367 sv = (stringobject *) *pv;
368 sv->ob_size = newsize;
369 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000370 return 0;
371}