blob: 9bc18e2444d177ea4e0754a3a25dae3fc76ea36a [file] [log] [blame]
Fred Drake3adf79e2001-10-12 19:01:43 +00001\chapter{Defining New Object Types \label{newTypes}}
2
3
4\section{Allocating Objects on the Heap
5 \label{allocating-objects}}
6
7\begin{cfuncdesc}{PyObject*}{_PyObject_New}{PyTypeObject *type}
8\end{cfuncdesc}
9
10\begin{cfuncdesc}{PyVarObject*}{_PyObject_NewVar}{PyTypeObject *type, int size}
11\end{cfuncdesc}
12
13\begin{cfuncdesc}{void}{_PyObject_Del}{PyObject *op}
14\end{cfuncdesc}
15
16\begin{cfuncdesc}{PyObject*}{PyObject_Init}{PyObject *op,
17 PyTypeObject *type}
18 Initialize a newly-allocated object \var{op} with its type and
19 initial reference. Returns the initialized object. If \var{type}
20 indicates that the object participates in the cyclic garbage
21 detector, it it added to the detector's set of observed objects.
22 Other fields of the object are not affected.
23\end{cfuncdesc}
24
25\begin{cfuncdesc}{PyVarObject*}{PyObject_InitVar}{PyVarObject *op,
26 PyTypeObject *type, int size}
27 This does everything \cfunction{PyObject_Init()} does, and also
28 initializes the length information for a variable-size object.
29\end{cfuncdesc}
30
31\begin{cfuncdesc}{\var{TYPE}*}{PyObject_New}{TYPE, PyTypeObject *type}
32 Allocate a new Python object using the C structure type \var{TYPE}
33 and the Python type object \var{type}. Fields not defined by the
34 Python object header are not initialized; the object's reference
35 count will be one. The size of the memory
36 allocation is determined from the \member{tp_basicsize} field of the
37 type object.
38\end{cfuncdesc}
39
40\begin{cfuncdesc}{\var{TYPE}*}{PyObject_NewVar}{TYPE, PyTypeObject *type,
41 int size}
42 Allocate a new Python object using the C structure type \var{TYPE}
43 and the Python type object \var{type}. Fields not defined by the
44 Python object header are not initialized. The allocated memory
45 allows for the \var{TYPE} structure plus \var{size} fields of the
46 size given by the \member{tp_itemsize} field of \var{type}. This is
47 useful for implementing objects like tuples, which are able to
48 determine their size at construction time. Embedding the array of
49 fields into the same allocation decreases the number of allocations,
50 improving the memory management efficiency.
51\end{cfuncdesc}
52
53\begin{cfuncdesc}{void}{PyObject_Del}{PyObject *op}
54 Releases memory allocated to an object using
55 \cfunction{PyObject_New()} or \cfunction{PyObject_NewVar()}. This
56 is normally called from the \member{tp_dealloc} handler specified in
57 the object's type. The fields of the object should not be accessed
58 after this call as the memory is no longer a valid Python object.
59\end{cfuncdesc}
60
61\begin{cfuncdesc}{\var{TYPE}*}{PyObject_NEW}{TYPE, PyTypeObject *type}
62 Macro version of \cfunction{PyObject_New()}, to gain performance at
63 the expense of safety. This does not check \var{type} for a \NULL{}
64 value.
65\end{cfuncdesc}
66
67\begin{cfuncdesc}{\var{TYPE}*}{PyObject_NEW_VAR}{TYPE, PyTypeObject *type,
68 int size}
69 Macro version of \cfunction{PyObject_NewVar()}, to gain performance
70 at the expense of safety. This does not check \var{type} for a
71 \NULL{} value.
72\end{cfuncdesc}
73
74\begin{cfuncdesc}{void}{PyObject_DEL}{PyObject *op}
75 Macro version of \cfunction{PyObject_Del()}.
76\end{cfuncdesc}
77
78\begin{cfuncdesc}{PyObject*}{Py_InitModule}{char *name,
79 PyMethodDef *methods}
80 Create a new module object based on a name and table of functions,
81 returning the new module object.
82\end{cfuncdesc}
83
84\begin{cfuncdesc}{PyObject*}{Py_InitModule3}{char *name,
85 PyMethodDef *methods,
86 char *doc}
87 Create a new module object based on a name and table of functions,
88 returning the new module object. If \var{doc} is non-\NULL, it will
89 be used to define the docstring for the module.
90\end{cfuncdesc}
91
92\begin{cfuncdesc}{PyObject*}{Py_InitModule4}{char *name,
93 PyMethodDef *methods,
94 char *doc, PyObject *self,
95 int apiver}
96 Create a new module object based on a name and table of functions,
97 returning the new module object. If \var{doc} is non-\NULL, it will
98 be used to define the docstring for the module. If \var{self} is
99 non-\NULL, it will passed to the functions of the module as their
100 (otherwise \NULL) first parameter. (This was added as an
101 experimental feature, and there are no known uses in the current
102 version of Python.) For \var{apiver}, the only value which should
103 be passed is defined by the constant \constant{PYTHON_API_VERSION}.
104
105 \note{Most uses of this function should probably be using
106 the \cfunction{Py_InitModule3()} instead; only use this if you are
107 sure you need it.}
108\end{cfuncdesc}
109
110DL_IMPORT
111
112\begin{cvardesc}{PyObject}{_Py_NoneStruct}
113 Object which is visible in Python as \code{None}. This should only
114 be accessed using the \code{Py_None} macro, which evaluates to a
115 pointer to this object.
116\end{cvardesc}
117
118
119\section{Common Object Structures \label{common-structs}}
120
121PyObject, PyVarObject
122
123PyObject_HEAD, PyObject_HEAD_INIT, PyObject_VAR_HEAD
124
125Typedefs:
126unaryfunc, binaryfunc, ternaryfunc, inquiry, coercion, intargfunc,
127intintargfunc, intobjargproc, intintobjargproc, objobjargproc,
128destructor, printfunc, getattrfunc, getattrofunc, setattrfunc,
129setattrofunc, cmpfunc, reprfunc, hashfunc
130
131\begin{ctypedesc}{PyCFunction}
132 Type of the functions used to implement most Python callables in C.
133\end{ctypedesc}
134
135\begin{ctypedesc}{PyMethodDef}
136 Structure used to describe a method of an extension type. This
137 structure has four fields:
138
139 \begin{tableiii}{l|l|l}{member}{Field}{C Type}{Meaning}
140 \lineiii{ml_name}{char *}{name of the method}
141 \lineiii{ml_meth}{PyCFunction}{pointer to the C implementation}
142 \lineiii{ml_flags}{int}{flag bits indicating how the call should be
143 constructed}
144 \lineiii{ml_doc}{char *}{points to the contents of the docstring}
145 \end{tableiii}
146\end{ctypedesc}
147
148The \member{ml_meth} is a C function pointer. The functions may be of
149different types, but they always return \ctype{PyObject*}. If the
150function is not of the \ctype{PyCFunction}, the compiler will require
151a cast in the method table. Even though \ctype{PyCFunction} defines
152the first parameter as \ctype{PyObject*}, it is common that the method
153implementation uses a the specific C type of the \var{self} object.
154
155The flags can have the following values. Only \constant{METH_VARARGS}
156and \constant{METH_KEYWORDS} can be combined; the others can't.
157
158\begin{datadesc}{METH_VARARGS}
159 This is the typical calling convention, where the methods have the
160 type \ctype{PyMethodDef}. The function expects two
Fred Drake50ecc152001-11-17 06:39:18 +0000161 \ctype{PyObject*} values. The first one is the \var{self} object for
Fred Drake3adf79e2001-10-12 19:01:43 +0000162 methods; for module functions, it has the value given to
163 \cfunction{Py_InitModule4()} (or \NULL{} if
164 \cfunction{Py_InitModule()} was used). The second parameter
165 (often called \var{args}) is a tuple object representing all
166 arguments. This parameter is typically processed using
167 \cfunction{PyArg_ParseTuple()}.
168\end{datadesc}
169
170\begin{datadesc}{METH_KEYWORDS}
171 Methods with these flags must be of type
172 \ctype{PyCFunctionWithKeywords}. The function expects three
173 parameters: \var{self}, \var{args}, and a dictionary of all the
174 keyword arguments. The flag is typically combined with
175 \constant{METH_VARARGS}, and the parameters are typically processed
176 using \cfunction{PyArg_ParseTupleAndKeywords()}.
177\end{datadesc}
178
179\begin{datadesc}{METH_NOARGS}
180 Methods without parameters don't need to check whether arguments are
181 given if they are listed with the \constant{METH_NOARGS} flag. They
182 need to be of type \ctype{PyNoArgsFunction}: they expect a single
183 single \ctype{PyObject*} as a parameter. When used with object
184 methods, this parameter is typically named \code{self} and will hold
185 a reference to the object instance.
186\end{datadesc}
187
188\begin{datadesc}{METH_O}
189 Methods with a single object argument can be listed with the
190 \constant{METH_O} flag, instead of invoking
191 \cfunction{PyArg_ParseTuple()} with a \code{"O"} argument. They have
192 the type \ctype{PyCFunction}, with the \var{self} parameter, and a
193 \ctype{PyObject*} parameter representing the single argument.
194\end{datadesc}
195
196\begin{datadesc}{METH_OLDARGS}
197 This calling convention is deprecated. The method must be of type
198 \ctype{PyCFunction}. The second argument is \NULL{} if no arguments
199 are given, a single object if exactly one argument is given, and a
200 tuple of objects if more than one argument is given. There is no
201 way for a function using this convention to distinguish between a
202 call with multiple arguments and a call with a tuple as the only
203 argument.
204\end{datadesc}
205
206\begin{cfuncdesc}{PyObject*}{Py_FindMethod}{PyMethodDef table[],
207 PyObject *ob, char *name}
208 Return a bound method object for an extension type implemented in
209 C. This function also handles the special attribute
210 \member{__methods__}, returning a list of all the method names
211 defined in \var{table}.
212\end{cfuncdesc}
213
214
215\section{Mapping Object Structures \label{mapping-structs}}
216
217\begin{ctypedesc}{PyMappingMethods}
218 Structure used to hold pointers to the functions used to implement
219 the mapping protocol for an extension type.
220\end{ctypedesc}
221
222
223\section{Number Object Structures \label{number-structs}}
224
225\begin{ctypedesc}{PyNumberMethods}
226 Structure used to hold pointers to the functions an extension type
227 uses to implement the number protocol.
228\end{ctypedesc}
229
230
231\section{Sequence Object Structures \label{sequence-structs}}
232
233\begin{ctypedesc}{PySequenceMethods}
234 Structure used to hold pointers to the functions which an object
235 uses to implement the sequence protocol.
236\end{ctypedesc}
237
238
239\section{Buffer Object Structures \label{buffer-structs}}
240\sectionauthor{Greg J. Stein}{greg@lyra.org}
241
242The buffer interface exports a model where an object can expose its
243internal data as a set of chunks of data, where each chunk is
244specified as a pointer/length pair. These chunks are called
245\dfn{segments} and are presumed to be non-contiguous in memory.
246
247If an object does not export the buffer interface, then its
248\member{tp_as_buffer} member in the \ctype{PyTypeObject} structure
249should be \NULL. Otherwise, the \member{tp_as_buffer} will point to
250a \ctype{PyBufferProcs} structure.
251
252\note{It is very important that your \ctype{PyTypeObject} structure
253uses \constant{Py_TPFLAGS_DEFAULT} for the value of the
254\member{tp_flags} member rather than \code{0}. This tells the Python
255runtime that your \ctype{PyBufferProcs} structure contains the
256\member{bf_getcharbuffer} slot. Older versions of Python did not have
257this member, so a new Python interpreter using an old extension needs
258to be able to test for its presence before using it.}
259
260\begin{ctypedesc}{PyBufferProcs}
261 Structure used to hold the function pointers which define an
262 implementation of the buffer protocol.
263
264 The first slot is \member{bf_getreadbuffer}, of type
265 \ctype{getreadbufferproc}. If this slot is \NULL, then the object
266 does not support reading from the internal data. This is
267 non-sensical, so implementors should fill this in, but callers
268 should test that the slot contains a non-\NULL{} value.
269
270 The next slot is \member{bf_getwritebuffer} having type
271 \ctype{getwritebufferproc}. This slot may be \NULL{} if the object
272 does not allow writing into its returned buffers.
273
274 The third slot is \member{bf_getsegcount}, with type
275 \ctype{getsegcountproc}. This slot must not be \NULL{} and is used
276 to inform the caller how many segments the object contains. Simple
277 objects such as \ctype{PyString_Type} and \ctype{PyBuffer_Type}
278 objects contain a single segment.
279
280 The last slot is \member{bf_getcharbuffer}, of type
281 \ctype{getcharbufferproc}. This slot will only be present if the
282 \constant{Py_TPFLAGS_HAVE_GETCHARBUFFER} flag is present in the
283 \member{tp_flags} field of the object's \ctype{PyTypeObject}.
284 Before using this slot, the caller should test whether it is present
285 by using the
286 \cfunction{PyType_HasFeature()}\ttindex{PyType_HasFeature()}
287 function. If present, it may be \NULL, indicating that the object's
288 contents cannot be used as \emph{8-bit characters}.
289 The slot function may also raise an error if the object's contents
290 cannot be interpreted as 8-bit characters. For example, if the
291 object is an array which is configured to hold floating point
292 values, an exception may be raised if a caller attempts to use
293 \member{bf_getcharbuffer} to fetch a sequence of 8-bit characters.
294 This notion of exporting the internal buffers as ``text'' is used to
295 distinguish between objects that are binary in nature, and those
296 which have character-based content.
297
298 \note{The current policy seems to state that these characters
299 may be multi-byte characters. This implies that a buffer size of
300 \var{N} does not mean there are \var{N} characters present.}
301\end{ctypedesc}
302
303\begin{datadesc}{Py_TPFLAGS_HAVE_GETCHARBUFFER}
304 Flag bit set in the type structure to indicate that the
305 \member{bf_getcharbuffer} slot is known. This being set does not
306 indicate that the object supports the buffer interface or that the
307 \member{bf_getcharbuffer} slot is non-\NULL.
308\end{datadesc}
309
310\begin{ctypedesc}[getreadbufferproc]{int (*getreadbufferproc)
311 (PyObject *self, int segment, void **ptrptr)}
312 Return a pointer to a readable segment of the buffer. This function
313 is allowed to raise an exception, in which case it must return
314 \code{-1}. The \var{segment} which is passed must be zero or
315 positive, and strictly less than the number of segments returned by
316 the \member{bf_getsegcount} slot function. On success, it returns
317 the length of the buffer memory, and sets \code{*\var{ptrptr}} to a
318 pointer to that memory.
319\end{ctypedesc}
320
321\begin{ctypedesc}[getwritebufferproc]{int (*getwritebufferproc)
322 (PyObject *self, int segment, void **ptrptr)}
323 Return a pointer to a writable memory buffer in
324 \code{*\var{ptrptr}}, and the length of that segment as the function
325 return value. The memory buffer must correspond to buffer segment
326 \var{segment}. Must return \code{-1} and set an exception on
327 error. \exception{TypeError} should be raised if the object only
328 supports read-only buffers, and \exception{SystemError} should be
329 raised when \var{segment} specifies a segment that doesn't exist.
330% Why doesn't it raise ValueError for this one?
331% GJS: because you shouldn't be calling it with an invalid
332% segment. That indicates a blatant programming error in the C
333% code.
334\end{ctypedesc}
335
336\begin{ctypedesc}[getsegcountproc]{int (*getsegcountproc)
337 (PyObject *self, int *lenp)}
338 Return the number of memory segments which comprise the buffer. If
339 \var{lenp} is not \NULL, the implementation must report the sum of
340 the sizes (in bytes) of all segments in \code{*\var{lenp}}.
341 The function cannot fail.
342\end{ctypedesc}
343
344\begin{ctypedesc}[getcharbufferproc]{int (*getcharbufferproc)
345 (PyObject *self, int segment, const char **ptrptr)}
346\end{ctypedesc}
347
348
349\section{Supporting the Iterator Protocol
350 \label{supporting-iteration}}
351
352
353\section{Supporting Cyclic Garbarge Collection
354 \label{supporting-cycle-detection}}
355
356Python's support for detecting and collecting garbage which involves
357circular references requires support from object types which are
358``containers'' for other objects which may also be containers. Types
359which do not store references to other objects, or which only store
360references to atomic types (such as numbers or strings), do not need
361to provide any explicit support for garbage collection.
362
363To create a container type, the \member{tp_flags} field of the type
364object must include the \constant{Py_TPFLAGS_HAVE_GC} and provide an
365implementation of the \member{tp_traverse} handler. If instances of the
366type are mutable, a \member{tp_clear} implementation must also be
367provided.
368
369\begin{datadesc}{Py_TPFLAGS_HAVE_GC}
370 Objects with a type with this flag set must conform with the rules
371 documented here. For convenience these objects will be referred to
372 as container objects.
373\end{datadesc}
374
375Constructors for container types must conform to two rules:
376
377\begin{enumerate}
378\item The memory for the object must be allocated using
379 \cfunction{PyObject_GC_New()} or \cfunction{PyObject_GC_VarNew()}.
380
381\item Once all the fields which may contain references to other
382 containers are initialized, it must call
383 \cfunction{PyObject_GC_Track()}.
384\end{enumerate}
385
386\begin{cfuncdesc}{\var{TYPE}*}{PyObject_GC_New}{TYPE, PyTypeObject *type}
387 Analogous to \cfunction{PyObject_New()} but for container objects with
388 the \constant{Py_TPFLAGS_HAVE_GC} flag set.
389\end{cfuncdesc}
390
391\begin{cfuncdesc}{\var{TYPE}*}{PyObject_GC_NewVar}{TYPE, PyTypeObject *type,
392 int size}
393 Analogous to \cfunction{PyObject_NewVar()} but for container objects
394 with the \constant{Py_TPFLAGS_HAVE_GC} flag set.
395\end{cfuncdesc}
396
397\begin{cfuncdesc}{PyVarObject *}{PyObject_GC_Resize}{PyVarObject *op, int}
398 Resize an object allocated by \cfunction{PyObject_NewVar()}. Returns
399 the resized object or \NULL{} on failure.
400\end{cfuncdesc}
401
402\begin{cfuncdesc}{void}{PyObject_GC_Track}{PyObject *op}
403 Adds the object \var{op} to the set of container objects tracked by
404 the collector. The collector can run at unexpected times so objects
405 must be valid while being tracked. This should be called once all
406 the fields followed by the \member{tp_traverse} handler become valid,
407 usually near the end of the constructor.
408\end{cfuncdesc}
409
410\begin{cfuncdesc}{void}{_PyObject_GC_TRACK}{PyObject *op}
411 A macro version of \cfunction{PyObject_GC_Track()}. It should not be
412 used for extension modules.
413\end{cfuncdesc}
414
415Similarly, the deallocator for the object must conform to a similar
416pair of rules:
417
418\begin{enumerate}
419\item Before fields which refer to other containers are invalidated,
420 \cfunction{PyObject_GC_UnTrack()} must be called.
421
422\item The object's memory must be deallocated using
423 \cfunction{PyObject_GC_Del()}.
424\end{enumerate}
425
426\begin{cfuncdesc}{void}{PyObject_GC_Del}{PyObject *op}
427 Releases memory allocated to an object using
428 \cfunction{PyObject_GC_New()} or \cfunction{PyObject_GC_NewVar()}.
429\end{cfuncdesc}
430
431\begin{cfuncdesc}{void}{PyObject_GC_UnTrack}{PyObject *op}
432 Remove the object \var{op} from the set of container objects tracked
433 by the collector. Note that \cfunction{PyObject_GC_Track()} can be
434 called again on this object to add it back to the set of tracked
435 objects. The deallocator (\member{tp_dealloc} handler) should call
436 this for the object before any of the fields used by the
437 \member{tp_traverse} handler become invalid.
438\end{cfuncdesc}
439
440\begin{cfuncdesc}{void}{_PyObject_GC_UNTRACK}{PyObject *op}
441 A macro version of \cfunction{PyObject_GC_UnTrack()}. It should not be
442 used for extension modules.
443\end{cfuncdesc}
444
445The \member{tp_traverse} handler accepts a function parameter of this
446type:
447
448\begin{ctypedesc}[visitproc]{int (*visitproc)(PyObject *object, void *arg)}
449 Type of the visitor function passed to the \member{tp_traverse}
450 handler. The function should be called with an object to traverse
451 as \var{object} and the third parameter to the \member{tp_traverse}
452 handler as \var{arg}.
453\end{ctypedesc}
454
455The \member{tp_traverse} handler must have the following type:
456
457\begin{ctypedesc}[traverseproc]{int (*traverseproc)(PyObject *self,
458 visitproc visit, void *arg)}
459 Traversal function for a container object. Implementations must
460 call the \var{visit} function for each object directly contained by
461 \var{self}, with the parameters to \var{visit} being the contained
462 object and the \var{arg} value passed to the handler. If
463 \var{visit} returns a non-zero value then an error has occurred and
464 that value should be returned immediately.
465\end{ctypedesc}
466
467The \member{tp_clear} handler must be of the \ctype{inquiry} type, or
468\NULL{} if the object is immutable.
469
470\begin{ctypedesc}[inquiry]{int (*inquiry)(PyObject *self)}
471 Drop references that may have created reference cycles. Immutable
472 objects do not have to define this method since they can never
473 directly create reference cycles. Note that the object must still
474 be valid after calling this method (don't just call
475 \cfunction{Py_DECREF()} on a reference). The collector will call
476 this method if it detects that this object is involved in a
477 reference cycle.
478\end{ctypedesc}
479
480
481\subsection{Example Cycle Collector Support
482 \label{example-cycle-support}}
483
484This example shows only enough of the implementation of an extension
485type to show how the garbage collector support needs to be added. It
486shows the definition of the object structure, the
487\member{tp_traverse}, \member{tp_clear} and \member{tp_dealloc}
488implementations, the type structure, and a constructor --- the module
489initialization needed to export the constructor to Python is not shown
490as there are no special considerations there for the collector. To
491make this interesting, assume that the module exposes ways for the
492\member{container} field of the object to be modified. Note that
493since no checks are made on the type of the object used to initialize
494\member{container}, we have to assume that it may be a container.
495
496\begin{verbatim}
497#include "Python.h"
498
499typedef struct {
500 PyObject_HEAD
501 PyObject *container;
502} MyObject;
503
504static int
505my_traverse(MyObject *self, visitproc visit, void *arg)
506{
507 if (self->container != NULL)
508 return visit(self->container, arg);
509 else
510 return 0;
511}
512
513static int
514my_clear(MyObject *self)
515{
516 Py_XDECREF(self->container);
517 self->container = NULL;
518
519 return 0;
520}
521
522static void
523my_dealloc(MyObject *self)
524{
525 PyObject_GC_UnTrack((PyObject *) self);
526 Py_XDECREF(self->container);
527 PyObject_GC_Del(self);
528}
529\end{verbatim}
530
531\begin{verbatim}
532statichere PyTypeObject
533MyObject_Type = {
534 PyObject_HEAD_INIT(NULL)
535 0,
536 "MyObject",
537 sizeof(MyObject),
538 0,
539 (destructor)my_dealloc, /* tp_dealloc */
540 0, /* tp_print */
541 0, /* tp_getattr */
542 0, /* tp_setattr */
543 0, /* tp_compare */
544 0, /* tp_repr */
545 0, /* tp_as_number */
546 0, /* tp_as_sequence */
547 0, /* tp_as_mapping */
548 0, /* tp_hash */
549 0, /* tp_call */
550 0, /* tp_str */
551 0, /* tp_getattro */
552 0, /* tp_setattro */
553 0, /* tp_as_buffer */
554 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
555 0, /* tp_doc */
556 (traverseproc)my_traverse, /* tp_traverse */
557 (inquiry)my_clear, /* tp_clear */
558 0, /* tp_richcompare */
559 0, /* tp_weaklistoffset */
560};
561
562/* This constructor should be made accessible from Python. */
563static PyObject *
564new_object(PyObject *unused, PyObject *args)
565{
566 PyObject *container = NULL;
567 MyObject *result = NULL;
568
569 if (PyArg_ParseTuple(args, "|O:new_object", &container)) {
570 result = PyObject_GC_New(MyObject, &MyObject_Type);
571 if (result != NULL) {
572 result->container = container;
573 PyObject_GC_Track(result);
574 }
575 }
576 return (PyObject *) result;
577}
578\end{verbatim}