blob: 03c377763b6095251eb53adc564142c41c4aa361 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object interface */
3
Fred Drake3cf4d2b2000-07-09 00:55:06 +00004#ifndef Py_STRINGOBJECT_H
5#define Py_STRINGOBJECT_H
6#ifdef __cplusplus
7extern "C" {
8#endif
9
Barry Warsawdadace02001-08-24 18:32:06 +000010#include <stdarg.h>
11
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000012/*
Guido van Rossumcaa63801995-01-12 11:45:45 +000013Type PyStringObject represents a character string. An extra zero byte is
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014reserved at the end to ensure it is zero-terminated, but a size is
15present so strings with null bytes in them can be represented. This
16is an immutable object type.
17
18There are functions to create new string objects, to test
19an object for string-ness, and to get the
20string value. The latter function returns a null pointer
21if the object is not of the proper type.
22There is a variant that takes an explicit size as well as a
23variant that assumes a zero-terminated string. Note that none of the
24functions should be applied to nil objects.
25*/
26
Tim Peters1f7df352002-03-29 03:29:08 +000027/* Caching the hash (ob_shash) saves recalculation of a string's hash value.
Guido van Rossum45ec02a2002-08-19 21:43:18 +000028 Interning strings (ob_sstate) tries to ensure that only one string
Tim Peters1f7df352002-03-29 03:29:08 +000029 object with a given value exists, so equality tests can be one pointer
30 comparison. This is generally restricted to strings that "look like"
31 Python identifiers, although the intern() builtin can be used to force
32 interning of any string.
33 Together, these sped the interpreter by up to 20%. */
Guido van Rossumfdebf251996-07-30 16:42:03 +000034
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035typedef struct {
Fred Drake3cf4d2b2000-07-09 00:55:06 +000036 PyObject_VAR_HEAD
Fred Drake3cf4d2b2000-07-09 00:55:06 +000037 long ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000038 int ob_sstate;
Fred Drake3cf4d2b2000-07-09 00:55:06 +000039 char ob_sval[1];
Armin Rigo89a39462004-10-28 16:32:00 +000040
41 /* Invariants:
42 * ob_sval contains space for 'ob_size+1' elements.
43 * ob_sval[ob_size] == 0.
44 * ob_shash is the hash of the string or -1 if not computed yet.
45 * ob_sstate != 0 iff the string object is in stringobject.c's
46 * 'interned' dictionary; in this case the two references
47 * from 'interned' to this object are *not counted* in ob_refcnt.
48 */
Guido van Rossumcaa63801995-01-12 11:45:45 +000049} PyStringObject;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000050
Guido van Rossum45ec02a2002-08-19 21:43:18 +000051#define SSTATE_NOT_INTERNED 0
52#define SSTATE_INTERNED_MORTAL 1
53#define SSTATE_INTERNED_IMMORTAL 2
54
Mark Hammond91a681d2002-08-12 07:21:58 +000055PyAPI_DATA(PyTypeObject) PyBaseString_Type;
56PyAPI_DATA(PyTypeObject) PyString_Type;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057
Neal Norwitzee3a1b52007-02-25 19:44:48 +000058#define PyString_Check(op) \
59 PyType_FastSubclass((op)->ob_type, Py_TPFLAGS_STRING_SUBCLASS)
Tim Peters5a49ade2001-09-11 01:41:59 +000060#define PyString_CheckExact(op) ((op)->ob_type == &PyString_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000061
Martin v. Löwis18e16552006-02-15 17:27:45 +000062PyAPI_FUNC(PyObject *) PyString_FromStringAndSize(const char *, Py_ssize_t);
Mark Hammond91a681d2002-08-12 07:21:58 +000063PyAPI_FUNC(PyObject *) PyString_FromString(const char *);
64PyAPI_FUNC(PyObject *) PyString_FromFormatV(const char*, va_list)
Neil Schemenauer96aa0ac2002-09-15 14:09:54 +000065 Py_GCC_ATTRIBUTE((format(printf, 1, 0)));
Mark Hammond91a681d2002-08-12 07:21:58 +000066PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...)
Neil Schemenauer96aa0ac2002-09-15 14:09:54 +000067 Py_GCC_ATTRIBUTE((format(printf, 1, 2)));
Martin v. Löwis18e16552006-02-15 17:27:45 +000068PyAPI_FUNC(Py_ssize_t) PyString_Size(PyObject *);
Mark Hammond91a681d2002-08-12 07:21:58 +000069PyAPI_FUNC(char *) PyString_AsString(PyObject *);
Martin v. Löwis8a8da792002-08-14 07:46:28 +000070PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
Mark Hammond91a681d2002-08-12 07:21:58 +000071PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
72PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
Martin v. Löwis18e16552006-02-15 17:27:45 +000073PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
Mark Hammond91a681d2002-08-12 07:21:58 +000074PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
75PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
76PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
Tim Peters38fd5b62000-09-21 05:43:11 +000077 int, char**, int*);
Martin v. Löwis18e16552006-02-15 17:27:45 +000078PyAPI_FUNC(PyObject *) PyString_DecodeEscape(const char *, Py_ssize_t,
79 const char *, Py_ssize_t,
Martin v. Löwis8a8da792002-08-14 07:46:28 +000080 const char *);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000081
Mark Hammond91a681d2002-08-12 07:21:58 +000082PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083PyAPI_FUNC(void) PyString_InternImmortal(PyObject **);
Mark Hammond91a681d2002-08-12 07:21:58 +000084PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
85PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
Guido van Rossum1e6e9a21997-01-18 07:53:23 +000086
Guido van Rossum45ec02a2002-08-19 21:43:18 +000087/* Use only if you know it's a string */
88#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
89
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090/* Macro, trading safety for speed */
Barry Warsawaccfb841997-01-06 22:42:50 +000091#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval)
92#define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size)
Guido van Rossuma3309961993-07-28 09:05:47 +000093
Tim Petersa7259592001-06-16 05:11:17 +000094/* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*,
95 x must be an iterable object. */
Mark Hammond91a681d2002-08-12 07:21:58 +000096PyAPI_FUNC(PyObject *) _PyString_Join(PyObject *sep, PyObject *x);
Tim Petersa7259592001-06-16 05:11:17 +000097
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +000098/* --- Generic Codecs ----------------------------------------------------- */
99
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000100/* Create an object by decoding the encoded string s of the
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000101 given size. */
102
Mark Hammond91a681d2002-08-12 07:21:58 +0000103PyAPI_FUNC(PyObject*) PyString_Decode(
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000104 const char *s, /* encoded string */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000105 Py_ssize_t size, /* size of buffer */
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000106 const char *encoding, /* encoding */
107 const char *errors /* error handling */
108 );
109
110/* Encodes a char buffer of the given size and returns a
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000111 Python object. */
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000112
Mark Hammond91a681d2002-08-12 07:21:58 +0000113PyAPI_FUNC(PyObject*) PyString_Encode(
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000114 const char *s, /* string char buffer */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000115 Py_ssize_t size, /* number of chars to encode */
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000116 const char *encoding, /* encoding */
117 const char *errors /* error handling */
118 );
119
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000120/* Encodes a string object and returns the result as Python
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000121 object. */
122
Mark Hammond91a681d2002-08-12 07:21:58 +0000123PyAPI_FUNC(PyObject*) PyString_AsEncodedObject(
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000124 PyObject *str, /* string object */
125 const char *encoding, /* encoding */
126 const char *errors /* error handling */
127 );
128
129/* Encodes a string object and returns the result as Python string
130 object.
131
132 If the codec returns an Unicode object, the object is converted
133 back to a string using the default encoding.
134
135 DEPRECATED - use PyString_AsEncodedObject() instead. */
136
Mark Hammond91a681d2002-08-12 07:21:58 +0000137PyAPI_FUNC(PyObject*) PyString_AsEncodedString(
Marc-André Lemburg3d1a1d72000-07-06 11:25:40 +0000138 PyObject *str, /* string object */
139 const char *encoding, /* encoding */
140 const char *errors /* error handling */
141 );
142
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000143/* Decodes a string object and returns the result as Python
144 object. */
145
Mark Hammond91a681d2002-08-12 07:21:58 +0000146PyAPI_FUNC(PyObject*) PyString_AsDecodedObject(
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000147 PyObject *str, /* string object */
148 const char *encoding, /* encoding */
149 const char *errors /* error handling */
150 );
151
152/* Decodes a string object and returns the result as Python string
153 object.
154
155 If the codec returns an Unicode object, the object is converted
156 back to a string using the default encoding.
157
158 DEPRECATED - use PyString_AsDecodedObject() instead. */
159
Mark Hammond91a681d2002-08-12 07:21:58 +0000160PyAPI_FUNC(PyObject*) PyString_AsDecodedString(
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000161 PyObject *str, /* string object */
162 const char *encoding, /* encoding */
163 const char *errors /* error handling */
164 );
165
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000166/* Provides access to the internal data buffer and size of a string
167 object or the default encoded version of an Unicode object. Passing
168 NULL as *len parameter will force the string buffer to be
169 0-terminated (passing a string with embedded NULL characters will
170 cause an exception). */
171
Mark Hammond91a681d2002-08-12 07:21:58 +0000172PyAPI_FUNC(int) PyString_AsStringAndSize(
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000173 register PyObject *obj, /* string or Unicode object */
174 register char **s, /* pointer to buffer variable */
Martin v. Löwis18e16552006-02-15 17:27:45 +0000175 register Py_ssize_t *len /* pointer to length variable or NULL
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000176 (only possible for 0-terminated
177 strings) */
178 );
179
180
Guido van Rossuma3309961993-07-28 09:05:47 +0000181#ifdef __cplusplus
182}
183#endif
184#endif /* !Py_STRINGOBJECT_H */