Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 1 | \chapter{Utilities \label{utilities}} |
| 2 | |
| 3 | The functions in this chapter perform various utility tasks, ranging |
| 4 | from helping C code be more portable across platforms, using Python |
| 5 | modules from C, and parsing function arguments and constructing Python |
| 6 | values from C values. |
| 7 | |
| 8 | |
| 9 | \section{Operating System Utilities \label{os}} |
| 10 | |
| 11 | \begin{cfuncdesc}{int}{Py_FdIsInteractive}{FILE *fp, char *filename} |
| 12 | Return true (nonzero) if the standard I/O file \var{fp} with name |
| 13 | \var{filename} is deemed interactive. This is the case for files |
| 14 | for which \samp{isatty(fileno(\var{fp}))} is true. If the global |
| 15 | flag \cdata{Py_InteractiveFlag} is true, this function also returns |
| 16 | true if the \var{filename} pointer is \NULL{} or if the name is |
| 17 | equal to one of the strings \code{'<stdin>'} or \code{'???'}. |
| 18 | \end{cfuncdesc} |
| 19 | |
| 20 | \begin{cfuncdesc}{long}{PyOS_GetLastModificationTime}{char *filename} |
| 21 | Return the time of last modification of the file \var{filename}. |
| 22 | The result is encoded in the same way as the timestamp returned by |
| 23 | the standard C library function \cfunction{time()}. |
| 24 | \end{cfuncdesc} |
| 25 | |
| 26 | \begin{cfuncdesc}{void}{PyOS_AfterFork}{} |
| 27 | Function to update some internal state after a process fork; this |
| 28 | should be called in the new process if the Python interpreter will |
| 29 | continue to be used. If a new executable is loaded into the new |
| 30 | process, this function does not need to be called. |
| 31 | \end{cfuncdesc} |
| 32 | |
| 33 | \begin{cfuncdesc}{int}{PyOS_CheckStack}{} |
| 34 | Return true when the interpreter runs out of stack space. This is a |
| 35 | reliable check, but is only available when \constant{USE_STACKCHECK} |
| 36 | is defined (currently on Windows using the Microsoft Visual \Cpp{} |
| 37 | compiler and on the Macintosh). \constant{USE_CHECKSTACK} will be |
| 38 | defined automatically; you should never change the definition in |
| 39 | your own code. |
| 40 | \end{cfuncdesc} |
| 41 | |
| 42 | \begin{cfuncdesc}{PyOS_sighandler_t}{PyOS_getsig}{int i} |
| 43 | Return the current signal handler for signal \var{i}. This is a |
| 44 | thin wrapper around either \cfunction{sigaction()} or |
| 45 | \cfunction{signal()}. Do not call those functions directly! |
| 46 | \ctype{PyOS_sighandler_t} is a typedef alias for \ctype{void |
| 47 | (*)(int)}. |
| 48 | \end{cfuncdesc} |
| 49 | |
| 50 | \begin{cfuncdesc}{PyOS_sighandler_t}{PyOS_setsig}{int i, PyOS_sighandler_t h} |
| 51 | Set the signal handler for signal \var{i} to be \var{h}; return the |
| 52 | old signal handler. This is a thin wrapper around either |
| 53 | \cfunction{sigaction()} or \cfunction{signal()}. Do not call those |
| 54 | functions directly! \ctype{PyOS_sighandler_t} is a typedef alias |
| 55 | for \ctype{void (*)(int)}. |
| 56 | \end{cfuncdesc} |
| 57 | |
| 58 | |
| 59 | \section{Process Control \label{processControl}} |
| 60 | |
| 61 | \begin{cfuncdesc}{void}{Py_FatalError}{char *message} |
| 62 | Print a fatal error message and kill the process. No cleanup is |
| 63 | performed. This function should only be invoked when a condition is |
| 64 | detected that would make it dangerous to continue using the Python |
| 65 | interpreter; e.g., when the object administration appears to be |
| 66 | corrupted. On \UNIX, the standard C library function |
| 67 | \cfunction{abort()}\ttindex{abort()} is called which will attempt to |
| 68 | produce a \file{core} file. |
| 69 | \end{cfuncdesc} |
| 70 | |
| 71 | \begin{cfuncdesc}{void}{Py_Exit}{int status} |
| 72 | Exit the current process. This calls |
| 73 | \cfunction{Py_Finalize()}\ttindex{Py_Finalize()} and then calls the |
| 74 | standard C library function |
| 75 | \code{exit(\var{status})}\ttindex{exit()}. |
| 76 | \end{cfuncdesc} |
| 77 | |
| 78 | \begin{cfuncdesc}{int}{Py_AtExit}{void (*func) ()} |
| 79 | Register a cleanup function to be called by |
| 80 | \cfunction{Py_Finalize()}\ttindex{Py_Finalize()}. The cleanup |
| 81 | function will be called with no arguments and should return no |
| 82 | value. At most 32 \index{cleanup functions}cleanup functions can be |
| 83 | registered. When the registration is successful, |
| 84 | \cfunction{Py_AtExit()} returns \code{0}; on failure, it returns |
| 85 | \code{-1}. The cleanup function registered last is called first. |
| 86 | Each cleanup function will be called at most once. Since Python's |
| 87 | internal finallization will have completed before the cleanup |
| 88 | function, no Python APIs should be called by \var{func}. |
| 89 | \end{cfuncdesc} |
| 90 | |
| 91 | |
| 92 | \section{Importing Modules \label{importing}} |
| 93 | |
| 94 | \begin{cfuncdesc}{PyObject*}{PyImport_ImportModule}{char *name} |
| 95 | This is a simplified interface to |
| 96 | \cfunction{PyImport_ImportModuleEx()} below, leaving the |
| 97 | \var{globals} and \var{locals} arguments set to \NULL. When the |
| 98 | \var{name} argument contains a dot (when it specifies a submodule of |
| 99 | a package), the \var{fromlist} argument is set to the list |
| 100 | \code{['*']} so that the return value is the named module rather |
| 101 | than the top-level package containing it as would otherwise be the |
| 102 | case. (Unfortunately, this has an additional side effect when |
| 103 | \var{name} in fact specifies a subpackage instead of a submodule: |
| 104 | the submodules specified in the package's \code{__all__} variable |
| 105 | are \index{package variable!\code{__all__}} |
| 106 | \withsubitem{(package variable)}{\ttindex{__all__}}loaded.) Return |
| 107 | a new reference to the imported module, or \NULL{} with an exception |
| 108 | set on failure (the module may still be created in this case --- |
| 109 | examine \code{sys.modules} to find out). |
| 110 | \withsubitem{(in module sys)}{\ttindex{modules}} |
| 111 | \end{cfuncdesc} |
| 112 | |
| 113 | \begin{cfuncdesc}{PyObject*}{PyImport_ImportModuleEx}{char *name, |
| 114 | PyObject *globals, PyObject *locals, PyObject *fromlist} |
| 115 | Import a module. This is best described by referring to the |
| 116 | built-in Python function |
| 117 | \function{__import__()}\bifuncindex{__import__}, as the standard |
| 118 | \function{__import__()} function calls this function directly. |
| 119 | |
| 120 | The return value is a new reference to the imported module or |
| 121 | top-level package, or \NULL{} with an exception set on failure (the |
| 122 | module may still be created in this case). Like for |
| 123 | \function{__import__()}, the return value when a submodule of a |
| 124 | package was requested is normally the top-level package, unless a |
| 125 | non-empty \var{fromlist} was given. |
| 126 | \end{cfuncdesc} |
| 127 | |
| 128 | \begin{cfuncdesc}{PyObject*}{PyImport_Import}{PyObject *name} |
| 129 | This is a higher-level interface that calls the current ``import |
| 130 | hook function''. It invokes the \function{__import__()} function |
| 131 | from the \code{__builtins__} of the current globals. This means |
| 132 | that the import is done using whatever import hooks are installed in |
| 133 | the current environment, e.g. by \module{rexec}\refstmodindex{rexec} |
| 134 | or \module{ihooks}\refstmodindex{ihooks}. |
| 135 | \end{cfuncdesc} |
| 136 | |
| 137 | \begin{cfuncdesc}{PyObject*}{PyImport_ReloadModule}{PyObject *m} |
| 138 | Reload a module. This is best described by referring to the |
| 139 | built-in Python function \function{reload()}\bifuncindex{reload}, as |
| 140 | the standard \function{reload()} function calls this function |
| 141 | directly. Return a new reference to the reloaded module, or \NULL{} |
| 142 | with an exception set on failure (the module still exists in this |
| 143 | case). |
| 144 | \end{cfuncdesc} |
| 145 | |
| 146 | \begin{cfuncdesc}{PyObject*}{PyImport_AddModule}{char *name} |
| 147 | Return the module object corresponding to a module name. The |
| 148 | \var{name} argument may be of the form \code{package.module}). |
| 149 | First check the modules dictionary if there's one there, and if not, |
| 150 | create a new one and insert in in the modules dictionary. |
| 151 | \note{This function does not load or import the module; if the |
| 152 | module wasn't already loaded, you will get an empty module object. |
| 153 | Use \cfunction{PyImport_ImportModule()} or one of its variants to |
| 154 | import a module. Return \NULL{} with an exception set on failure.} |
| 155 | \end{cfuncdesc} |
| 156 | |
| 157 | \begin{cfuncdesc}{PyObject*}{PyImport_ExecCodeModule}{char *name, PyObject *co} |
| 158 | Given a module name (possibly of the form \code{package.module}) and |
| 159 | a code object read from a Python bytecode file or obtained from the |
| 160 | built-in function \function{compile()}\bifuncindex{compile}, load |
| 161 | the module. Return a new reference to the module object, or \NULL{} |
| 162 | with an exception set if an error occurred (the module may still be |
| 163 | created in this case). (This function would reload the module if it |
| 164 | was already imported.) |
| 165 | \end{cfuncdesc} |
| 166 | |
| 167 | \begin{cfuncdesc}{long}{PyImport_GetMagicNumber}{} |
| 168 | Return the magic number for Python bytecode files |
| 169 | (a.k.a. \file{.pyc} and \file{.pyo} files). The magic number should |
| 170 | be present in the first four bytes of the bytecode file, in |
| 171 | little-endian byte order. |
| 172 | \end{cfuncdesc} |
| 173 | |
| 174 | \begin{cfuncdesc}{PyObject*}{PyImport_GetModuleDict}{} |
| 175 | Return the dictionary used for the module administration |
| 176 | (a.k.a.\ \code{sys.modules}). Note that this is a per-interpreter |
| 177 | variable. |
| 178 | \end{cfuncdesc} |
| 179 | |
| 180 | \begin{cfuncdesc}{void}{_PyImport_Init}{} |
| 181 | Initialize the import mechanism. For internal use only. |
| 182 | \end{cfuncdesc} |
| 183 | |
| 184 | \begin{cfuncdesc}{void}{PyImport_Cleanup}{} |
| 185 | Empty the module table. For internal use only. |
| 186 | \end{cfuncdesc} |
| 187 | |
| 188 | \begin{cfuncdesc}{void}{_PyImport_Fini}{} |
| 189 | Finalize the import mechanism. For internal use only. |
| 190 | \end{cfuncdesc} |
| 191 | |
| 192 | \begin{cfuncdesc}{PyObject*}{_PyImport_FindExtension}{char *, char *} |
| 193 | For internal use only. |
| 194 | \end{cfuncdesc} |
| 195 | |
| 196 | \begin{cfuncdesc}{PyObject*}{_PyImport_FixupExtension}{char *, char *} |
| 197 | For internal use only. |
| 198 | \end{cfuncdesc} |
| 199 | |
| 200 | \begin{cfuncdesc}{int}{PyImport_ImportFrozenModule}{char *name} |
| 201 | Load a frozen module named \var{name}. Return \code{1} for success, |
| 202 | \code{0} if the module is not found, and \code{-1} with an exception |
| 203 | set if the initialization failed. To access the imported module on |
| 204 | a successful load, use \cfunction{PyImport_ImportModule()}. (Note |
| 205 | the misnomer --- this function would reload the module if it was |
| 206 | already imported.) |
| 207 | \end{cfuncdesc} |
| 208 | |
| 209 | \begin{ctypedesc}[_frozen]{struct _frozen} |
| 210 | This is the structure type definition for frozen module descriptors, |
| 211 | as generated by the \program{freeze}\index{freeze utility} utility |
| 212 | (see \file{Tools/freeze/} in the Python source distribution). Its |
| 213 | definition, found in \file{Include/import.h}, is: |
| 214 | |
| 215 | \begin{verbatim} |
| 216 | struct _frozen { |
| 217 | char *name; |
| 218 | unsigned char *code; |
| 219 | int size; |
| 220 | }; |
| 221 | \end{verbatim} |
| 222 | \end{ctypedesc} |
| 223 | |
| 224 | \begin{cvardesc}{struct _frozen*}{PyImport_FrozenModules} |
| 225 | This pointer is initialized to point to an array of \ctype{struct |
| 226 | _frozen} records, terminated by one whose members are all \NULL{} or |
| 227 | zero. When a frozen module is imported, it is searched in this |
| 228 | table. Third-party code could play tricks with this to provide a |
| 229 | dynamically created collection of frozen modules. |
| 230 | \end{cvardesc} |
| 231 | |
| 232 | \begin{cfuncdesc}{int}{PyImport_AppendInittab}{char *name, |
| 233 | void (*initfunc)(void)} |
| 234 | Add a single module to the existing table of built-in modules. This |
| 235 | is a convenience wrapper around |
| 236 | \cfunction{PyImport_ExtendInittab()}, returning \code{-1} if the |
| 237 | table could not be extended. The new module can be imported by the |
| 238 | name \var{name}, and uses the function \var{initfunc} as the |
| 239 | initialization function called on the first attempted import. This |
| 240 | should be called before \cfunction{Py_Initialize()}. |
| 241 | \end{cfuncdesc} |
| 242 | |
| 243 | \begin{ctypedesc}[_inittab]{struct _inittab} |
| 244 | Structure describing a single entry in the list of built-in |
| 245 | modules. Each of these structures gives the name and initialization |
| 246 | function for a module built into the interpreter. Programs which |
| 247 | embed Python may use an array of these structures in conjunction |
| 248 | with \cfunction{PyImport_ExtendInittab()} to provide additional |
| 249 | built-in modules. The structure is defined in |
| 250 | \file{Include/import.h} as: |
| 251 | |
| 252 | \begin{verbatim} |
| 253 | struct _inittab { |
| 254 | char *name; |
| 255 | void (*initfunc)(void); |
| 256 | }; |
| 257 | \end{verbatim} |
| 258 | \end{ctypedesc} |
| 259 | |
| 260 | \begin{cfuncdesc}{int}{PyImport_ExtendInittab}{struct _inittab *newtab} |
| 261 | Add a collection of modules to the table of built-in modules. The |
| 262 | \var{newtab} array must end with a sentinel entry which contains |
| 263 | \NULL{} for the \member{name} field; failure to provide the sentinel |
| 264 | value can result in a memory fault. Returns \code{0} on success or |
| 265 | \code{-1} if insufficient memory could be allocated to extend the |
| 266 | internal table. In the event of failure, no modules are added to |
| 267 | the internal table. This should be called before |
| 268 | \cfunction{Py_Initialize()}. |
| 269 | \end{cfuncdesc} |
| 270 | |
| 271 | |
Fred Drake | 0fae49f | 2001-10-14 04:45:51 +0000 | [diff] [blame] | 272 | \section{Data marshalling support \label{marshalling-utils}} |
| 273 | |
| 274 | These routines allow C code to work with serialized objects using the |
| 275 | same data format as the \module{marshal} module. There are functions |
| 276 | to write data into the serialization format, and additional functions |
| 277 | that can be used to read the data back. Files used to store marshalled |
| 278 | data must be opened in binary mode. |
| 279 | |
| 280 | Numeric values are stored with the least significant byte first. |
| 281 | |
| 282 | \begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file} |
| 283 | Marshal a \ctype{long} integer, \var{value}, to \var{file}. This |
| 284 | will only write the least-significant 32 bits of \var{value}; |
| 285 | regardless of the size of the native \ctype{long} type. |
| 286 | \end{cfuncdesc} |
| 287 | |
| 288 | \begin{cfuncdesc}{void}{PyMarshal_WriteShortToFile}{short value, FILE *file} |
Fred Drake | b084017 | 2002-06-17 15:44:18 +0000 | [diff] [blame] | 289 | Marshal a \ctype{short} integer, \var{value}, to \var{file}. This |
| 290 | will only write the least-significant 16 bits of \var{value}; |
| 291 | regardless of the size of the native \ctype{short} type. |
Fred Drake | 0fae49f | 2001-10-14 04:45:51 +0000 | [diff] [blame] | 292 | \end{cfuncdesc} |
| 293 | |
| 294 | \begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value, |
| 295 | FILE *file} |
Fred Drake | b084017 | 2002-06-17 15:44:18 +0000 | [diff] [blame] | 296 | Marshal a Python object, \var{value}, to \var{file}. |
Fred Drake | 0fae49f | 2001-10-14 04:45:51 +0000 | [diff] [blame] | 297 | \end{cfuncdesc} |
| 298 | |
| 299 | \begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value} |
| 300 | Return a string object containing the marshalled representation of |
| 301 | \var{value}. |
| 302 | \end{cfuncdesc} |
| 303 | |
| 304 | The following functions allow marshalled values to be read back in. |
| 305 | |
| 306 | XXX What about error detection? It appears that reading past the end |
| 307 | of the file will always result in a negative numeric value (where |
| 308 | that's relevant), but it's not clear that negative values won't be |
| 309 | handled properly when there's no error. What's the right way to tell? |
| 310 | Should only non-negative values be written using these routines? |
| 311 | |
| 312 | \begin{cfuncdesc}{long}{PyMarshal_ReadLongFromFile}{FILE *file} |
| 313 | Return a C \ctype{long} from the data stream in a \ctype{FILE*} |
| 314 | opened for reading. Only a 32-bit value can be read in using |
| 315 | this function, regardless of the native size of \ctype{long}. |
| 316 | \end{cfuncdesc} |
| 317 | |
| 318 | \begin{cfuncdesc}{int}{PyMarshal_ReadShortFromFile}{FILE *file} |
| 319 | Return a C \ctype{short} from the data stream in a \ctype{FILE*} |
| 320 | opened for reading. Only a 16-bit value can be read in using |
Fred Drake | b084017 | 2002-06-17 15:44:18 +0000 | [diff] [blame] | 321 | this function, regardless of the native size of \ctype{short}. |
Fred Drake | 0fae49f | 2001-10-14 04:45:51 +0000 | [diff] [blame] | 322 | \end{cfuncdesc} |
| 323 | |
| 324 | \begin{cfuncdesc}{PyObject*}{PyMarshal_ReadObjectFromFile}{FILE *file} |
| 325 | Return a Python object from the data stream in a \ctype{FILE*} |
| 326 | opened for reading. On error, sets the appropriate exception |
| 327 | (\exception{EOFError} or \exception{TypeError}) and returns \NULL. |
| 328 | \end{cfuncdesc} |
| 329 | |
| 330 | \begin{cfuncdesc}{PyObject*}{PyMarshal_ReadLastObjectFromFile}{FILE *file} |
| 331 | Return a Python object from the data stream in a \ctype{FILE*} |
| 332 | opened for reading. Unlike |
| 333 | \cfunction{PyMarshal_ReadObjectFromFile()}, this function assumes |
| 334 | that no further objects will be read from the file, allowing it to |
| 335 | aggressively load file data into memory so that the de-serialization |
| 336 | can operate from data in memory rather than reading a byte at a time |
| 337 | from the file. Only use these variant if you are certain that you |
| 338 | won't be reading anything else from the file. On error, sets the |
| 339 | appropriate exception (\exception{EOFError} or |
| 340 | \exception{TypeError}) and returns \NULL. |
| 341 | \end{cfuncdesc} |
| 342 | |
| 343 | \begin{cfuncdesc}{PyObject*}{PyMarshal_ReadObjectFromString}{char *string, |
| 344 | int len} |
| 345 | Return a Python object from the data stream in a character buffer |
| 346 | containing \var{len} bytes pointed to by \var{string}. On error, |
| 347 | sets the appropriate exception (\exception{EOFError} or |
| 348 | \exception{TypeError}) and returns \NULL. |
| 349 | \end{cfuncdesc} |
| 350 | |
| 351 | |
Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 352 | \section{Parsing arguments and building values |
| 353 | \label{arg-parsing}} |
| 354 | |
| 355 | These functions are useful when creating your own extensions functions |
| 356 | and methods. Additional information and examples are available in |
| 357 | \citetitle[../ext/ext.html]{Extending and Embedding the Python |
| 358 | Interpreter}. |
| 359 | |
Fred Drake | 68304cc | 2002-04-05 23:01:14 +0000 | [diff] [blame] | 360 | The first three of these functions described, |
| 361 | \cfunction{PyArg_ParseTuple()}, |
| 362 | \cfunction{PyArg_ParseTupleAndKeywords()}, and |
| 363 | \cfunction{PyArg_Parse()}, all use \emph{format strings} which are |
| 364 | used to tell the function about the expected arguments. The format |
| 365 | strings use the same syntax for each of these functions. |
| 366 | |
| 367 | A format string consists of zero or more ``format units.'' A format |
| 368 | unit describes one Python object; it is usually a single character or |
| 369 | a parenthesized sequence of format units. With a few exceptions, a |
| 370 | format unit that is not a parenthesized sequence normally corresponds |
| 371 | to a single address argument to these functions. In the following |
| 372 | description, the quoted form is the format unit; the entry in (round) |
| 373 | parentheses is the Python object type that matches the format unit; |
| 374 | and the entry in [square] brackets is the type of the C variable(s) |
| 375 | whose address should be passed. |
| 376 | |
| 377 | \begin{description} |
| 378 | \item[\samp{s} (string or Unicode object) {[char *]}] |
| 379 | Convert a Python string or Unicode object to a C pointer to a |
| 380 | character string. You must not provide storage for the string |
| 381 | itself; a pointer to an existing string is stored into the character |
| 382 | pointer variable whose address you pass. The C string is |
| 383 | NUL-terminated. The Python string must not contain embedded NUL |
| 384 | bytes; if it does, a \exception{TypeError} exception is raised. |
| 385 | Unicode objects are converted to C strings using the default |
| 386 | encoding. If this conversion fails, a \exception{UnicodeError} is |
| 387 | raised. |
| 388 | |
| 389 | \item[\samp{s\#} (string, Unicode or any read buffer compatible object) |
| 390 | {[char *, int]}] |
| 391 | This variant on \samp{s} stores into two C variables, the first one |
| 392 | a pointer to a character string, the second one its length. In this |
| 393 | case the Python string may contain embedded null bytes. Unicode |
| 394 | objects pass back a pointer to the default encoded string version of |
| 395 | the object if such a conversion is possible. All other read-buffer |
| 396 | compatible objects pass back a reference to the raw internal data |
| 397 | representation. |
| 398 | |
| 399 | \item[\samp{z} (string or \code{None}) {[char *]}] |
| 400 | Like \samp{s}, but the Python object may also be \code{None}, in |
| 401 | which case the C pointer is set to \NULL. |
| 402 | |
| 403 | \item[\samp{z\#} (string or \code{None} or any read buffer |
| 404 | compatible object) {[char *, int]}] |
| 405 | This is to \samp{s\#} as \samp{z} is to \samp{s}. |
| 406 | |
| 407 | \item[\samp{u} (Unicode object) {[Py_UNICODE *]}] |
| 408 | Convert a Python Unicode object to a C pointer to a NUL-terminated |
| 409 | buffer of 16-bit Unicode (UTF-16) data. As with \samp{s}, there is |
| 410 | no need to provide storage for the Unicode data buffer; a pointer to |
| 411 | the existing Unicode data is stored into the \ctype{Py_UNICODE} |
| 412 | pointer variable whose address you pass. |
| 413 | |
| 414 | \item[\samp{u\#} (Unicode object) {[Py_UNICODE *, int]}] |
| 415 | This variant on \samp{u} stores into two C variables, the first one |
| 416 | a pointer to a Unicode data buffer, the second one its length. |
| 417 | Non-Unicode objects are handled by interpreting their read-buffer |
| 418 | pointer as pointer to a \ctype{Py_UNICODE} array. |
| 419 | |
| 420 | \item[\samp{es} (string, Unicode object or character buffer |
| 421 | compatible object) {[const char *encoding, char **buffer]}] |
| 422 | This variant on \samp{s} is used for encoding Unicode and objects |
| 423 | convertible to Unicode into a character buffer. It only works for |
| 424 | encoded data without embedded NUL bytes. |
| 425 | |
| 426 | This format requires two arguments. The first is only used as |
| 427 | input, and must be a \ctype{char*} which points to the name of an |
| 428 | encoding as a NUL-terminated string, or \NULL, in which case the |
| 429 | default encoding is used. An exception is raised if the named |
| 430 | encoding is not known to Python. The second argument must be a |
| 431 | \ctype{char**}; the value of the pointer it references will be set |
| 432 | to a buffer with the contents of the argument text. The text will |
| 433 | be encoded in the encoding specified by the first argument. |
| 434 | |
| 435 | \cfunction{PyArg_ParseTuple()} will allocate a buffer of the needed |
| 436 | size, copy the encoded data into this buffer and adjust |
| 437 | \var{*buffer} to reference the newly allocated storage. The caller |
| 438 | is responsible for calling \cfunction{PyMem_Free()} to free the |
| 439 | allocated buffer after use. |
| 440 | |
| 441 | \item[\samp{et} (string, Unicode object or character buffer |
| 442 | compatible object) {[const char *encoding, char **buffer]}] |
| 443 | Same as \samp{es} except that 8-bit string objects are passed |
| 444 | through without recoding them. Instead, the implementation assumes |
| 445 | that the string object uses the encoding passed in as parameter. |
| 446 | |
| 447 | \item[\samp{es\#} (string, Unicode object or character buffer compatible |
| 448 | object) {[const char *encoding, char **buffer, int *buffer_length]}] |
| 449 | This variant on \samp{s\#} is used for encoding Unicode and objects |
| 450 | convertible to Unicode into a character buffer. Unlike the |
| 451 | \samp{es} format, this variant allows input data which contains NUL |
| 452 | characters. |
| 453 | |
| 454 | It requires three arguments. The first is only used as input, and |
| 455 | must be a \ctype{char*} which points to the name of an encoding as a |
| 456 | NUL-terminated string, or \NULL, in which case the default encoding |
| 457 | is used. An exception is raised if the named encoding is not known |
| 458 | to Python. The second argument must be a \ctype{char**}; the value |
| 459 | of the pointer it references will be set to a buffer with the |
| 460 | contents of the argument text. The text will be encoded in the |
| 461 | encoding specified by the first argument. The third argument must |
| 462 | be a pointer to an integer; the referenced integer will be set to |
| 463 | the number of bytes in the output buffer. |
| 464 | |
| 465 | There are two modes of operation: |
| 466 | |
| 467 | If \var{*buffer} points a \NULL{} pointer, the function will |
| 468 | allocate a buffer of the needed size, copy the encoded data into |
| 469 | this buffer and set \var{*buffer} to reference the newly allocated |
| 470 | storage. The caller is responsible for calling |
| 471 | \cfunction{PyMem_Free()} to free the allocated buffer after usage. |
| 472 | |
| 473 | If \var{*buffer} points to a non-\NULL{} pointer (an already |
| 474 | allocated buffer), \cfunction{PyArg_ParseTuple()} will use this |
| 475 | location as the buffer and interpret the initial value of |
| 476 | \var{*buffer_length} as the buffer size. It will then copy the |
| 477 | encoded data into the buffer and NUL-terminate it. If the buffer |
| 478 | is not large enough, a \exception{ValueError} will be set. |
| 479 | |
| 480 | In both cases, \var{*buffer_length} is set to the length of the |
| 481 | encoded data without the trailing NUL byte. |
| 482 | |
| 483 | \item[\samp{et\#} (string, Unicode object or character buffer compatible |
| 484 | object) {[const char *encoding, char **buffer]}] |
| 485 | Same as \samp{es\#} except that string objects are passed through |
| 486 | without recoding them. Instead, the implementation assumes that the |
| 487 | string object uses the encoding passed in as parameter. |
| 488 | |
| 489 | \item[\samp{b} (integer) {[char]}] |
| 490 | Convert a Python integer to a tiny int, stored in a C \ctype{char}. |
| 491 | |
| 492 | \item[\samp{h} (integer) {[short int]}] |
| 493 | Convert a Python integer to a C \ctype{short int}. |
| 494 | |
| 495 | \item[\samp{i} (integer) {[int]}] |
| 496 | Convert a Python integer to a plain C \ctype{int}. |
| 497 | |
| 498 | \item[\samp{l} (integer) {[long int]}] |
| 499 | Convert a Python integer to a C \ctype{long int}. |
| 500 | |
| 501 | \item[\samp{L} (integer) {[LONG_LONG]}] |
| 502 | Convert a Python integer to a C \ctype{long long}. This format is |
| 503 | only available on platforms that support \ctype{long long} (or |
| 504 | \ctype{_int64} on Windows). |
| 505 | |
| 506 | \item[\samp{c} (string of length 1) {[char]}] |
| 507 | Convert a Python character, represented as a string of length 1, to |
| 508 | a C \ctype{char}. |
| 509 | |
| 510 | \item[\samp{f} (float) {[float]}] |
| 511 | Convert a Python floating point number to a C \ctype{float}. |
| 512 | |
| 513 | \item[\samp{d} (float) {[double]}] |
| 514 | Convert a Python floating point number to a C \ctype{double}. |
| 515 | |
| 516 | \item[\samp{D} (complex) {[Py_complex]}] |
| 517 | Convert a Python complex number to a C \ctype{Py_complex} structure. |
| 518 | |
| 519 | \item[\samp{O} (object) {[PyObject *]}] |
| 520 | Store a Python object (without any conversion) in a C object |
| 521 | pointer. The C program thus receives the actual object that was |
| 522 | passed. The object's reference count is not increased. The pointer |
| 523 | stored is not \NULL. |
| 524 | |
| 525 | \item[\samp{O!} (object) {[\var{typeobject}, PyObject *]}] |
| 526 | Store a Python object in a C object pointer. This is similar to |
| 527 | \samp{O}, but takes two C arguments: the first is the address of a |
| 528 | Python type object, the second is the address of the C variable (of |
| 529 | type \ctype{PyObject*}) into which the object pointer is stored. If |
| 530 | the Python object does not have the required type, |
| 531 | \exception{TypeError} is raised. |
| 532 | |
| 533 | \item[\samp{O\&} (object) {[\var{converter}, \var{anything}]}] |
| 534 | Convert a Python object to a C variable through a \var{converter} |
| 535 | function. This takes two arguments: the first is a function, the |
| 536 | second is the address of a C variable (of arbitrary type), converted |
| 537 | to \ctype{void *}. The \var{converter} function in turn is called |
| 538 | as follows: |
| 539 | |
| 540 | \var{status}\code{ = }\var{converter}\code{(}\var{object}, |
| 541 | \var{address}\code{);} |
| 542 | |
| 543 | where \var{object} is the Python object to be converted and |
| 544 | \var{address} is the \ctype{void*} argument that was passed to the |
| 545 | \cfunction{PyArg_Parse*()} function. The returned \var{status} |
| 546 | should be \code{1} for a successful conversion and \code{0} if the |
| 547 | conversion has failed. When the conversion fails, the |
| 548 | \var{converter} function should raise an exception. |
| 549 | |
| 550 | \item[\samp{S} (string) {[PyStringObject *]}] |
| 551 | Like \samp{O} but requires that the Python object is a string |
| 552 | object. Raises \exception{TypeError} if the object is not a string |
| 553 | object. The C variable may also be declared as \ctype{PyObject*}. |
| 554 | |
| 555 | \item[\samp{U} (Unicode string) {[PyUnicodeObject *]}] |
| 556 | Like \samp{O} but requires that the Python object is a Unicode |
| 557 | object. Raises \exception{TypeError} if the object is not a Unicode |
| 558 | object. The C variable may also be declared as \ctype{PyObject*}. |
| 559 | |
| 560 | \item[\samp{t\#} (read-only character buffer) {[char *, int]}] |
| 561 | Like \samp{s\#}, but accepts any object which implements the |
| 562 | read-only buffer interface. The \ctype{char*} variable is set to |
| 563 | point to the first byte of the buffer, and the \ctype{int} is set to |
| 564 | the length of the buffer. Only single-segment buffer objects are |
| 565 | accepted; \exception{TypeError} is raised for all others. |
| 566 | |
| 567 | \item[\samp{w} (read-write character buffer) {[char *]}] |
| 568 | Similar to \samp{s}, but accepts any object which implements the |
| 569 | read-write buffer interface. The caller must determine the length |
| 570 | of the buffer by other means, or use \samp{w\#} instead. Only |
| 571 | single-segment buffer objects are accepted; \exception{TypeError} is |
| 572 | raised for all others. |
| 573 | |
| 574 | \item[\samp{w\#} (read-write character buffer) {[char *, int]}] |
| 575 | Like \samp{s\#}, but accepts any object which implements the |
| 576 | read-write buffer interface. The \ctype{char *} variable is set to |
| 577 | point to the first byte of the buffer, and the \ctype{int} is set to |
| 578 | the length of the buffer. Only single-segment buffer objects are |
| 579 | accepted; \exception{TypeError} is raised for all others. |
| 580 | |
| 581 | \item[\samp{(\var{items})} (tuple) {[\var{matching-items}]}] |
| 582 | The object must be a Python sequence whose length is the number of |
| 583 | format units in \var{items}. The C arguments must correspond to the |
| 584 | individual format units in \var{items}. Format units for sequences |
| 585 | may be nested. |
| 586 | |
| 587 | \note{Prior to Python version 1.5.2, this format specifier only |
| 588 | accepted a tuple containing the individual parameters, not an |
| 589 | arbitrary sequence. Code which previously caused |
| 590 | \exception{TypeError} to be raised here may now proceed without an |
| 591 | exception. This is not expected to be a problem for existing code.} |
| 592 | \end{description} |
| 593 | |
| 594 | It is possible to pass Python long integers where integers are |
| 595 | requested; however no proper range checking is done --- the most |
| 596 | significant bits are silently truncated when the receiving field is |
| 597 | too small to receive the value (actually, the semantics are inherited |
| 598 | from downcasts in C --- your mileage may vary). |
| 599 | |
| 600 | A few other characters have a meaning in a format string. These may |
| 601 | not occur inside nested parentheses. They are: |
| 602 | |
| 603 | \begin{description} |
| 604 | \item[\samp{|}] |
| 605 | Indicates that the remaining arguments in the Python argument list |
| 606 | are optional. The C variables corresponding to optional arguments |
| 607 | should be initialized to their default value --- when an optional |
| 608 | argument is not specified, \cfunction{PyArg_ParseTuple()} does not |
| 609 | touch the contents of the corresponding C variable(s). |
| 610 | |
| 611 | \item[\samp{:}] |
| 612 | The list of format units ends here; the string after the colon is |
| 613 | used as the function name in error messages (the ``associated |
| 614 | value'' of the exception that \cfunction{PyArg_ParseTuple()} |
| 615 | raises). |
| 616 | |
| 617 | \item[\samp{;}] |
| 618 | The list of format units ends here; the string after the semicolon |
| 619 | is used as the error message \emph{instead} of the default error |
| 620 | message. Clearly, \samp{:} and \samp{;} mutually exclude each |
| 621 | other. |
| 622 | \end{description} |
| 623 | |
| 624 | Note that any Python object references which are provided to the |
| 625 | caller are \emph{borrowed} references; do not decrement their |
| 626 | reference count! |
| 627 | |
| 628 | Additional arguments passed to these functions must be addresses of |
| 629 | variables whose type is determined by the format string; these are |
| 630 | used to store values from the input tuple. There are a few cases, as |
| 631 | described in the list of format units above, where these parameters |
| 632 | are used as input values; they should match what is specified for the |
| 633 | corresponding format unit in that case. |
| 634 | |
| 635 | For the conversion to succeed, the \var{arg} object must match the |
| 636 | format and the format must be exhausted. On success, the |
| 637 | \cfunction{PyArg_Parse*()} functions return true, otherwise they |
| 638 | return false and raise an appropriate exception. |
| 639 | |
Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 640 | \begin{cfuncdesc}{int}{PyArg_ParseTuple}{PyObject *args, char *format, |
| 641 | \moreargs} |
| 642 | Parse the parameters of a function that takes only positional |
| 643 | parameters into local variables. Returns true on success; on |
Fred Drake | 68304cc | 2002-04-05 23:01:14 +0000 | [diff] [blame] | 644 | failure, it returns false and raises the appropriate exception. |
Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 645 | \end{cfuncdesc} |
| 646 | |
| 647 | \begin{cfuncdesc}{int}{PyArg_ParseTupleAndKeywords}{PyObject *args, |
| 648 | PyObject *kw, char *format, char *keywords[], |
| 649 | \moreargs} |
| 650 | Parse the parameters of a function that takes both positional and |
| 651 | keyword parameters into local variables. Returns true on success; |
| 652 | on failure, it returns false and raises the appropriate exception. |
Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 653 | \end{cfuncdesc} |
| 654 | |
| 655 | \begin{cfuncdesc}{int}{PyArg_Parse}{PyObject *args, char *format, |
| 656 | \moreargs} |
| 657 | Function used to deconstruct the argument lists of ``old-style'' |
| 658 | functions --- these are functions which use the |
| 659 | \constant{METH_OLDARGS} parameter parsing method. This is not |
| 660 | recommended for use in parameter parsing in new code, and most code |
| 661 | in the standard interpreter has been modified to no longer use this |
| 662 | for that purpose. It does remain a convenient way to decompose |
| 663 | other tuples, however, and may continue to be used for that |
| 664 | purpose. |
| 665 | \end{cfuncdesc} |
| 666 | |
Fred Drake | c84f2c5 | 2001-10-23 21:10:18 +0000 | [diff] [blame] | 667 | \begin{cfuncdesc}{int}{PyArg_UnpackTuple}{PyObject *args, char *name, |
| 668 | int min, int max, \moreargs} |
| 669 | A simpler form of parameter retrieval which does not use a format |
| 670 | string to specify the types of the arguments. Functions which use |
| 671 | this method to retrieve their parameters should be declared as |
| 672 | \constant{METH_VARARGS} in function or method tables. The tuple |
| 673 | containing the actual parameters should be passed as \var{args}; it |
| 674 | must actually be a tuple. The length of the tuple must be at least |
| 675 | \var{min} and no more than \var{max}; \var{min} and \var{max} may be |
| 676 | equal. Additional arguments must be passed to the function, each of |
| 677 | which should be a pointer to a \ctype{PyObject*} variable; these |
| 678 | will be filled in with the values from \var{args}; they will contain |
| 679 | borrowed references. The variables which correspond to optional |
| 680 | parameters not given by \var{args} will not be filled in; these |
| 681 | should be initialized by the caller. |
| 682 | This function returns true on success and false if \var{args} is not |
| 683 | a tuple or contains the wrong number of elements; an exception will |
| 684 | be set if there was a failure. |
| 685 | |
| 686 | This is an example of the use of this function, taken from the |
| 687 | sources for the \module{_weakref} helper module for weak references: |
| 688 | |
| 689 | \begin{verbatim} |
| 690 | static PyObject * |
| 691 | weakref_ref(PyObject *self, PyObject *args) |
| 692 | { |
| 693 | PyObject *object; |
| 694 | PyObject *callback = NULL; |
| 695 | PyObject *result = NULL; |
| 696 | |
| 697 | if (PyArg_UnpackTuple(args, "ref", 1, 2, &object, &callback)) { |
| 698 | result = PyWeakref_NewRef(object, callback); |
| 699 | } |
| 700 | return result; |
| 701 | } |
| 702 | \end{verbatim} |
| 703 | |
| 704 | The call to \cfunction{PyArg_UnpackTuple()} in this example is |
| 705 | entirely equivalent to this call to \cfunction{PyArg_ParseTuple()}: |
| 706 | |
| 707 | \begin{verbatim} |
| 708 | PyArg_ParseTuple(args, "O|O:ref", &object, &callback) |
| 709 | \end{verbatim} |
| 710 | |
| 711 | \versionadded{2.2} |
| 712 | \end{cfuncdesc} |
| 713 | |
Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 714 | \begin{cfuncdesc}{PyObject*}{Py_BuildValue}{char *format, |
| 715 | \moreargs} |
| 716 | Create a new value based on a format string similar to those |
| 717 | accepted by the \cfunction{PyArg_Parse*()} family of functions and a |
| 718 | sequence of values. Returns the value or \NULL{} in the case of an |
Fred Drake | 68304cc | 2002-04-05 23:01:14 +0000 | [diff] [blame] | 719 | error; an exception will be raised if \NULL{} is returned. |
| 720 | |
| 721 | \cfunction{Py_BuildValue()} does not always build a tuple. It |
| 722 | builds a tuple only if its format string contains two or more format |
| 723 | units. If the format string is empty, it returns \code{None}; if it |
| 724 | contains exactly one format unit, it returns whatever object is |
| 725 | described by that format unit. To force it to return a tuple of |
| 726 | size 0 or one, parenthesize the format string. |
| 727 | |
| 728 | When memory buffers are passed as parameters to supply data to build |
| 729 | objects, as for the \samp{s} and \samp{s\#} formats, the required |
| 730 | data is copied. Buffers provided by the caller are never referenced |
| 731 | by the objects created by \cfunction{Py_BuildValue()}. In other |
| 732 | words, if your code invokes \cfunction{malloc()} and passes the |
| 733 | allocated memory to \cfunction{Py_BuildValue()}, your code is |
| 734 | responsible for calling \cfunction{free()} for that memory once |
| 735 | \cfunction{Py_BuildValue()} returns. |
| 736 | |
| 737 | In the following description, the quoted form is the format unit; |
| 738 | the entry in (round) parentheses is the Python object type that the |
| 739 | format unit will return; and the entry in [square] brackets is the |
| 740 | type of the C value(s) to be passed. |
| 741 | |
| 742 | The characters space, tab, colon and comma are ignored in format |
| 743 | strings (but not within format units such as \samp{s\#}). This can |
| 744 | be used to make long format strings a tad more readable. |
| 745 | |
| 746 | \begin{description} |
| 747 | \item[\samp{s} (string) {[char *]}] |
| 748 | Convert a null-terminated C string to a Python object. If the C |
| 749 | string pointer is \NULL, \code{None} is used. |
| 750 | |
| 751 | \item[\samp{s\#} (string) {[char *, int]}] |
| 752 | Convert a C string and its length to a Python object. If the C |
| 753 | string pointer is \NULL, the length is ignored and \code{None} is |
| 754 | returned. |
| 755 | |
| 756 | \item[\samp{z} (string or \code{None}) {[char *]}] |
| 757 | Same as \samp{s}. |
| 758 | |
| 759 | \item[\samp{z\#} (string or \code{None}) {[char *, int]}] |
| 760 | Same as \samp{s\#}. |
| 761 | |
| 762 | \item[\samp{u} (Unicode string) {[Py_UNICODE *]}] |
| 763 | Convert a null-terminated buffer of Unicode (UCS-2) data to a |
| 764 | Python Unicode object. If the Unicode buffer pointer is \NULL, |
| 765 | \code{None} is returned. |
| 766 | |
| 767 | \item[\samp{u\#} (Unicode string) {[Py_UNICODE *, int]}] |
| 768 | Convert a Unicode (UCS-2) data buffer and its length to a Python |
| 769 | Unicode object. If the Unicode buffer pointer is \NULL, the |
| 770 | length is ignored and \code{None} is returned. |
| 771 | |
| 772 | \item[\samp{i} (integer) {[int]}] |
| 773 | Convert a plain C \ctype{int} to a Python integer object. |
| 774 | |
| 775 | \item[\samp{b} (integer) {[char]}] |
| 776 | Same as \samp{i}. |
| 777 | |
| 778 | \item[\samp{h} (integer) {[short int]}] |
| 779 | Same as \samp{i}. |
| 780 | |
| 781 | \item[\samp{l} (integer) {[long int]}] |
| 782 | Convert a C \ctype{long int} to a Python integer object. |
| 783 | |
| 784 | \item[\samp{c} (string of length 1) {[char]}] |
| 785 | Convert a C \ctype{int} representing a character to a Python |
| 786 | string of length 1. |
| 787 | |
| 788 | \item[\samp{d} (float) {[double]}] |
| 789 | Convert a C \ctype{double} to a Python floating point number. |
| 790 | |
| 791 | \item[\samp{f} (float) {[float]}] |
| 792 | Same as \samp{d}. |
| 793 | |
| 794 | \item[\samp{D} (complex) {[Py_complex *]}] |
| 795 | Convert a C \ctype{Py_complex} structure to a Python complex |
| 796 | number. |
| 797 | |
| 798 | \item[\samp{O} (object) {[PyObject *]}] |
| 799 | Pass a Python object untouched (except for its reference count, |
| 800 | which is incremented by one). If the object passed in is a |
| 801 | \NULL{} pointer, it is assumed that this was caused because the |
| 802 | call producing the argument found an error and set an exception. |
| 803 | Therefore, \cfunction{Py_BuildValue()} will return \NULL{} but |
| 804 | won't raise an exception. If no exception has been raised yet, |
| 805 | \exception{SystemError} is set. |
| 806 | |
| 807 | \item[\samp{S} (object) {[PyObject *]}] |
| 808 | Same as \samp{O}. |
| 809 | |
| 810 | \item[\samp{U} (object) {[PyObject *]}] |
| 811 | Same as \samp{O}. |
| 812 | |
| 813 | \item[\samp{N} (object) {[PyObject *]}] |
| 814 | Same as \samp{O}, except it doesn't increment the reference count |
| 815 | on the object. Useful when the object is created by a call to an |
| 816 | object constructor in the argument list. |
| 817 | |
| 818 | \item[\samp{O\&} (object) {[\var{converter}, \var{anything}]}] |
| 819 | Convert \var{anything} to a Python object through a |
| 820 | \var{converter} function. The function is called with |
| 821 | \var{anything} (which should be compatible with \ctype{void *}) as |
| 822 | its argument and should return a ``new'' Python object, or \NULL{} |
| 823 | if an error occurred. |
| 824 | |
| 825 | \item[\samp{(\var{items})} (tuple) {[\var{matching-items}]}] |
| 826 | Convert a sequence of C values to a Python tuple with the same |
| 827 | number of items. |
| 828 | |
| 829 | \item[\samp{[\var{items}]} (list) {[\var{matching-items}]}] |
| 830 | Convert a sequence of C values to a Python list with the same |
| 831 | number of items. |
| 832 | |
| 833 | \item[\samp{\{\var{items}\}} (dictionary) {[\var{matching-items}]}] |
| 834 | Convert a sequence of C values to a Python dictionary. Each pair |
| 835 | of consecutive C values adds one item to the dictionary, serving |
| 836 | as key and value, respectively. |
| 837 | |
| 838 | \end{description} |
| 839 | |
| 840 | If there is an error in the format string, the |
| 841 | \exception{SystemError} exception is set and \NULL{} returned. |
Fred Drake | 3adf79e | 2001-10-12 19:01:43 +0000 | [diff] [blame] | 842 | \end{cfuncdesc} |